hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject [07/16] hive git commit: HIVE-11890. Create ORC submodue. (omalley reviewed by prasanthj)
Date Fri, 11 Dec 2015 23:28:04 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
index 99a3e8d..2c9deac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
@@ -20,16 +20,16 @@ package org.apache.hadoop.hive.ql.exec;
 import java.io.IOException;
 
 import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.hadoop.hive.ql.io.orc.Writer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.orc.CompressionKind;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
 import org.apache.hadoop.hive.ql.io.orc.OrcFileKeyWrapper;
 import org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper;
 import org.apache.hadoop.hive.ql.io.orc.Reader;
-import org.apache.hadoop.hive.ql.io.orc.Writer;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java b/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java
index 56aec9f..878efbe 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java
@@ -18,7 +18,7 @@
 
 package org.apache.hadoop.hive.ql.io.filters;
 
-import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.orc.OrcProto;
 import org.apache.hive.common.util.BloomFilter;
 
 import com.google.common.primitives.Longs;

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java
deleted file mode 100644
index 23030a3..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics for binary columns.
- */
-public interface BinaryColumnStatistics extends ColumnStatistics {
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
deleted file mode 100644
index ec1f0a9..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
+++ /dev/null
@@ -1,212 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.EOFException;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-
-public class BitFieldReader {
-  private final RunLengthByteReader input;
-  /** The number of bits in one item. Non-test code always uses 1. */
-  private final int bitSize;
-  private int current;
-  private int bitsLeft;
-  private final int mask;
-
-  public BitFieldReader(InStream input,
-      int bitSize) throws IOException {
-    this.input = new RunLengthByteReader(input);
-    this.bitSize = bitSize;
-    mask = (1 << bitSize) - 1;
-  }
-
-  public void setInStream(InStream inStream) {
-    this.input.setInStream(inStream);
-  }
-
-  private void readByte() throws IOException {
-    if (input.hasNext()) {
-      current = 0xff & input.next();
-      bitsLeft = 8;
-    } else {
-      throw new EOFException("Read past end of bit field from " + this);
-    }
-  }
-
-  public int next() throws IOException {
-    int result = 0;
-    int bitsLeftToRead = bitSize;
-    while (bitsLeftToRead > bitsLeft) {
-      result <<= bitsLeft;
-      result |= current & ((1 << bitsLeft) - 1);
-      bitsLeftToRead -= bitsLeft;
-      readByte();
-    }
-    if (bitsLeftToRead > 0) {
-      result <<= bitsLeftToRead;
-      bitsLeft -= bitsLeftToRead;
-      result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
-    }
-    return result & mask;
-  }
-
-  /**
-   * Unlike integer readers, where runs are encoded explicitly, in this one we have to read ahead
-   * to figure out whether we have a run. Given that runs in booleans are likely it's worth it.
-   * However it means we'd need to keep track of how many bytes we read, and next/nextVector won't
-   * work anymore once this is called. These is trivial to fix, but these are never interspersed.
-   */
-  private boolean lastRunValue;
-  private int lastRunLength = -1;
-  private void readNextRun(int maxRunLength) throws IOException {
-    assert bitSize == 1;
-    if (lastRunLength > 0) return; // last run is not exhausted yet
-    if (bitsLeft == 0) {
-      readByte();
-    }
-    // First take care of the partial bits.
-    boolean hasVal = false;
-    int runLength = 0;
-    if (bitsLeft != 8) {
-      int partialBitsMask = (1 << bitsLeft) - 1;
-      int partialBits = current & partialBitsMask;
-      if (partialBits == partialBitsMask || partialBits == 0) {
-        lastRunValue = (partialBits == partialBitsMask);
-        if (maxRunLength <= bitsLeft) {
-          lastRunLength = maxRunLength;
-          return;
-        }
-        maxRunLength -= bitsLeft;
-        hasVal = true;
-        runLength = bitsLeft;
-        bitsLeft = 0;
-      } else {
-        // There's no run in partial bits. Return whatever we have.
-        int prefixBitsCount = 32 - bitsLeft;
-        runLength = Integer.numberOfLeadingZeros(partialBits) - prefixBitsCount;
-        lastRunValue = (runLength > 0);
-        lastRunLength = Math.min(maxRunLength, lastRunValue ? runLength :
-          (Integer.numberOfLeadingZeros(~(partialBits | ~partialBitsMask)) - prefixBitsCount));
-        return;
-      }
-      assert bitsLeft == 0;
-      readByte();
-    }
-    if (!hasVal) {
-      lastRunValue = ((current >> 7) == 1);
-      hasVal = true;
-    }
-    // Read full bytes until the run ends.
-    assert bitsLeft == 8;
-    while (maxRunLength >= 8
-        && ((lastRunValue && (current == 0xff)) || (!lastRunValue && (current == 0)))) {
-      runLength += 8;
-      maxRunLength -= 8;
-      readByte();
-    }
-    if (maxRunLength > 0) {
-      int extraBits = Integer.numberOfLeadingZeros(
-          lastRunValue ? (~(current | ~255)) : current) - 24;
-      bitsLeft -= extraBits;
-      runLength += extraBits;
-    }
-    lastRunLength = runLength;
-  }
-
-  void nextVector(LongColumnVector previous, long previousLen) throws IOException {
-    previous.isRepeating = true;
-    for (int i = 0; i < previousLen; i++) {
-      if (!previous.isNull[i]) {
-        previous.vector[i] = next();
-      } else {
-        // The default value of null for int types in vectorized
-        // processing is 1, so set that if the value is null
-        previous.vector[i] = 1;
-      }
-
-      // The default value for nulls in Vectorization for int types is 1
-      // and given that non null value can also be 1, we need to check for isNull also
-      // when determining the isRepeating flag.
-      if (previous.isRepeating
-          && i > 0
-          && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
-        previous.isRepeating = false;
-      }
-    }
-  }
-
-  public void seek(PositionProvider index) throws IOException {
-    input.seek(index);
-    int consumed = (int) index.getNext();
-    if (consumed > 8) {
-      throw new IllegalArgumentException("Seek past end of byte at " +
-          consumed + " in " + input);
-    } else if (consumed != 0) {
-      readByte();
-      bitsLeft = 8 - consumed;
-    } else {
-      bitsLeft = 0;
-    }
-  }
-
-  void skip(long items) throws IOException {
-    long totalBits = bitSize * items;
-    if (bitsLeft >= totalBits) {
-      bitsLeft -= totalBits;
-    } else {
-      totalBits -= bitsLeft;
-      input.skip(totalBits / 8);
-      current = input.next();
-      bitsLeft = (int) (8 - (totalBits % 8));
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "bit reader current: " + current + " bits left: " + bitsLeft +
-        " bit size: " + bitSize + " from " + input;
-  }
-
-  boolean hasFullByte() {
-    return bitsLeft == 8 || bitsLeft == 0;
-  }
-
-  int peekOneBit() throws IOException {
-    assert bitSize == 1;
-    if (bitsLeft == 0) {
-      readByte();
-    }
-    return (current >>> (bitsLeft - 1)) & 1;
-  }
-
-  int peekFullByte() throws IOException {
-    assert bitSize == 1;
-    assert bitsLeft == 8 || bitsLeft == 0;
-    if (bitsLeft == 0) {
-      readByte();
-    }
-    return current;
-  }
-
-  void skipInCurrentByte(int bits) throws IOException {
-    assert bitsLeft >= bits;
-    bitsLeft -= bits;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
deleted file mode 100644
index 0608da6..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-
-class BitFieldWriter {
-  private RunLengthByteWriter output;
-  private final int bitSize;
-  private byte current = 0;
-  private int bitsLeft = 8;
-
-  BitFieldWriter(PositionedOutputStream output,
-                 int bitSize) throws IOException {
-    this.output = new RunLengthByteWriter(output);
-    this.bitSize = bitSize;
-  }
-
-  private void writeByte() throws IOException {
-    output.write(current);
-    current = 0;
-    bitsLeft = 8;
-  }
-
-  void flush() throws IOException {
-    if (bitsLeft != 8) {
-      writeByte();
-    }
-    output.flush();
-  }
-
-  void write(int value) throws IOException {
-    int bitsToWrite = bitSize;
-    while (bitsToWrite > bitsLeft) {
-      // add the bits to the bottom of the current word
-      current |= value >>> (bitsToWrite - bitsLeft);
-      // subtract out the bits we just added
-      bitsToWrite -= bitsLeft;
-      // zero out the bits above bitsToWrite
-      value &= (1 << bitsToWrite) - 1;
-      writeByte();
-    }
-    bitsLeft -= bitsToWrite;
-    current |= value << bitsLeft;
-    if (bitsLeft == 0) {
-      writeByte();
-    }
-  }
-
-  void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(8 - bitsLeft);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
deleted file mode 100644
index 6d03998..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics for boolean columns.
- */
-public interface BooleanColumnStatistics extends ColumnStatistics {
-  long getFalseCount();
-
-  long getTrueCount();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
deleted file mode 100644
index 7c973c2..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics that are available for all types of columns.
- */
-public interface ColumnStatistics {
-  /**
-   * Get the number of values in this column. It will differ from the number
-   * of rows because of NULL values and repeated values.
-   * @return the number of values
-   */
-  long getNumberOfValues();
-
-  /**
-   * Returns true if there are nulls in the scope of column statistics.
-   * @return true if null present else false
-   */
-  boolean hasNull();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
deleted file mode 100644
index bcca9de..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
+++ /dev/null
@@ -1,1082 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparator;
-
-class ColumnStatisticsImpl implements ColumnStatistics {
-
-  private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
-      implements BooleanColumnStatistics {
-    private long trueCount = 0;
-
-    BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
-      trueCount = bkt.getCount(0);
-    }
-
-    BooleanStatisticsImpl() {
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      trueCount = 0;
-    }
-
-    @Override
-    void updateBoolean(boolean value, int repetitions) {
-      if (value) {
-        trueCount += repetitions;
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof BooleanStatisticsImpl) {
-        BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
-        trueCount += bkt.trueCount;
-      } else {
-        if (isStatsExists() && trueCount != 0) {
-          throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder builder = super.serialize();
-      OrcProto.BucketStatistics.Builder bucket =
-        OrcProto.BucketStatistics.newBuilder();
-      bucket.addCount(trueCount);
-      builder.setBucketStatistics(bucket);
-      return builder;
-    }
-
-    @Override
-    public long getFalseCount() {
-      return getNumberOfValues() - trueCount;
-    }
-
-    @Override
-    public long getTrueCount() {
-      return trueCount;
-    }
-
-    @Override
-    public String toString() {
-      return super.toString() + " true: " + trueCount;
-    }
-  }
-
-  private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
-      implements IntegerColumnStatistics {
-
-    private long minimum = Long.MAX_VALUE;
-    private long maximum = Long.MIN_VALUE;
-    private long sum = 0;
-    private boolean hasMinimum = false;
-    private boolean overflow = false;
-
-    IntegerStatisticsImpl() {
-    }
-
-    IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
-      if (intStat.hasMinimum()) {
-        hasMinimum = true;
-        minimum = intStat.getMinimum();
-      }
-      if (intStat.hasMaximum()) {
-        maximum = intStat.getMaximum();
-      }
-      if (intStat.hasSum()) {
-        sum = intStat.getSum();
-      } else {
-        overflow = true;
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      hasMinimum = false;
-      minimum = Long.MAX_VALUE;
-      maximum = Long.MIN_VALUE;
-      sum = 0;
-      overflow = false;
-    }
-
-    @Override
-    void updateInteger(long value, int repetitions) {
-      if (!hasMinimum) {
-        hasMinimum = true;
-        minimum = value;
-        maximum = value;
-      } else if (value < minimum) {
-        minimum = value;
-      } else if (value > maximum) {
-        maximum = value;
-      }
-      if (!overflow) {
-        boolean wasPositive = sum >= 0;
-        sum += value * repetitions;
-        if ((value >= 0) == wasPositive) {
-          overflow = (sum >= 0) != wasPositive;
-        }
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof IntegerStatisticsImpl) {
-        IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
-        if (!hasMinimum) {
-          hasMinimum = otherInt.hasMinimum;
-          minimum = otherInt.minimum;
-          maximum = otherInt.maximum;
-        } else if (otherInt.hasMinimum) {
-          if (otherInt.minimum < minimum) {
-            minimum = otherInt.minimum;
-          }
-          if (otherInt.maximum > maximum) {
-            maximum = otherInt.maximum;
-          }
-        }
-
-        overflow |= otherInt.overflow;
-        if (!overflow) {
-          boolean wasPositive = sum >= 0;
-          sum += otherInt.sum;
-          if ((otherInt.sum >= 0) == wasPositive) {
-            overflow = (sum >= 0) != wasPositive;
-          }
-        }
-      } else {
-        if (isStatsExists() && hasMinimum) {
-          throw new IllegalArgumentException("Incompatible merging of integer column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder builder = super.serialize();
-      OrcProto.IntegerStatistics.Builder intb =
-        OrcProto.IntegerStatistics.newBuilder();
-      if (hasMinimum) {
-        intb.setMinimum(minimum);
-        intb.setMaximum(maximum);
-      }
-      if (!overflow) {
-        intb.setSum(sum);
-      }
-      builder.setIntStatistics(intb);
-      return builder;
-    }
-
-    @Override
-    public long getMinimum() {
-      return minimum;
-    }
-
-    @Override
-    public long getMaximum() {
-      return maximum;
-    }
-
-    @Override
-    public boolean isSumDefined() {
-      return !overflow;
-    }
-
-    @Override
-    public long getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (hasMinimum) {
-        buf.append(" min: ");
-        buf.append(minimum);
-        buf.append(" max: ");
-        buf.append(maximum);
-      }
-      if (!overflow) {
-        buf.append(" sum: ");
-        buf.append(sum);
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
-       implements DoubleColumnStatistics {
-    private boolean hasMinimum = false;
-    private double minimum = Double.MAX_VALUE;
-    private double maximum = Double.MIN_VALUE;
-    private double sum = 0;
-
-    DoubleStatisticsImpl() {
-    }
-
-    DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
-      if (dbl.hasMinimum()) {
-        hasMinimum = true;
-        minimum = dbl.getMinimum();
-      }
-      if (dbl.hasMaximum()) {
-        maximum = dbl.getMaximum();
-      }
-      if (dbl.hasSum()) {
-        sum = dbl.getSum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      hasMinimum = false;
-      minimum = Double.MAX_VALUE;
-      maximum = Double.MIN_VALUE;
-      sum = 0;
-    }
-
-    @Override
-    void updateDouble(double value) {
-      if (!hasMinimum) {
-        hasMinimum = true;
-        minimum = value;
-        maximum = value;
-      } else if (value < minimum) {
-        minimum = value;
-      } else if (value > maximum) {
-        maximum = value;
-      }
-      sum += value;
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof DoubleStatisticsImpl) {
-        DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
-        if (!hasMinimum) {
-          hasMinimum = dbl.hasMinimum;
-          minimum = dbl.minimum;
-          maximum = dbl.maximum;
-        } else if (dbl.hasMinimum) {
-          if (dbl.minimum < minimum) {
-            minimum = dbl.minimum;
-          }
-          if (dbl.maximum > maximum) {
-            maximum = dbl.maximum;
-          }
-        }
-        sum += dbl.sum;
-      } else {
-        if (isStatsExists() && hasMinimum) {
-          throw new IllegalArgumentException("Incompatible merging of double column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder builder = super.serialize();
-      OrcProto.DoubleStatistics.Builder dbl =
-        OrcProto.DoubleStatistics.newBuilder();
-      if (hasMinimum) {
-        dbl.setMinimum(minimum);
-        dbl.setMaximum(maximum);
-      }
-      dbl.setSum(sum);
-      builder.setDoubleStatistics(dbl);
-      return builder;
-    }
-
-    @Override
-    public double getMinimum() {
-      return minimum;
-    }
-
-    @Override
-    public double getMaximum() {
-      return maximum;
-    }
-
-    @Override
-    public double getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (hasMinimum) {
-        buf.append(" min: ");
-        buf.append(minimum);
-        buf.append(" max: ");
-        buf.append(maximum);
-      }
-      buf.append(" sum: ");
-      buf.append(sum);
-      return buf.toString();
-    }
-  }
-
-  protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
-      implements StringColumnStatistics {
-    private Text minimum = null;
-    private Text maximum = null;
-    private long sum = 0;
-
-    StringStatisticsImpl() {
-    }
-
-    StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.StringStatistics str = stats.getStringStatistics();
-      if (str.hasMaximum()) {
-        maximum = new Text(str.getMaximum());
-      }
-      if (str.hasMinimum()) {
-        minimum = new Text(str.getMinimum());
-      }
-      if(str.hasSum()) {
-        sum = str.getSum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-      sum = 0;
-    }
-
-    @Override
-    void updateString(Text value) {
-      if (minimum == null) {
-        maximum = minimum = new Text(value);
-      } else if (minimum.compareTo(value) > 0) {
-        minimum = new Text(value);
-      } else if (maximum.compareTo(value) < 0) {
-        maximum = new Text(value);
-      }
-      sum += value.getLength();
-    }
-
-    @Override
-    void updateString(byte[] bytes, int offset, int length, int repetitions) {
-      if (minimum == null) {
-        maximum = minimum = new Text();
-        maximum.set(bytes, offset, length);
-      } else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
-          minimum.getLength(), bytes, offset, length) > 0) {
-        minimum = new Text();
-        minimum.set(bytes, offset, length);
-      } else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
-          maximum.getLength(), bytes, offset, length) < 0) {
-        maximum = new Text();
-        maximum.set(bytes, offset, length);
-      }
-      sum += length * repetitions;
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof StringStatisticsImpl) {
-        StringStatisticsImpl str = (StringStatisticsImpl) other;
-        if (minimum == null) {
-          if (str.minimum != null) {
-            maximum = new Text(str.getMaximum());
-            minimum = new Text(str.getMinimum());
-          } else {
-          /* both are empty */
-            maximum = minimum = null;
-          }
-        } else if (str.minimum != null) {
-          if (minimum.compareTo(str.minimum) > 0) {
-            minimum = new Text(str.getMinimum());
-          }
-          if (maximum.compareTo(str.maximum) < 0) {
-            maximum = new Text(str.getMaximum());
-          }
-        }
-        sum += str.sum;
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of string column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.StringStatistics.Builder str =
-        OrcProto.StringStatistics.newBuilder();
-      if (getNumberOfValues() != 0) {
-        str.setMinimum(getMinimum());
-        str.setMaximum(getMaximum());
-        str.setSum(sum);
-      }
-      result.setStringStatistics(str);
-      return result;
-    }
-
-    @Override
-    public String getMinimum() {
-      return minimum == null ? null : minimum.toString();
-    }
-
-    @Override
-    public String getMaximum() {
-      return maximum == null ? null : maximum.toString();
-    }
-
-    @Override
-    public long getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(getMinimum());
-        buf.append(" max: ");
-        buf.append(getMaximum());
-        buf.append(" sum: ");
-        buf.append(sum);
-      }
-      return buf.toString();
-    }
-  }
-
-  protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
-      BinaryColumnStatistics {
-
-    private long sum = 0;
-
-    BinaryStatisticsImpl() {
-    }
-
-    BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
-      if (binStats.hasSum()) {
-        sum = binStats.getSum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      sum = 0;
-    }
-
-    @Override
-    void updateBinary(BytesWritable value) {
-      sum += value.getLength();
-    }
-
-    @Override
-    void updateBinary(byte[] bytes, int offset, int length, int repetitions) {
-      sum += length * repetitions;
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof BinaryColumnStatistics) {
-        BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
-        sum += bin.sum;
-      } else {
-        if (isStatsExists() && sum != 0) {
-          throw new IllegalArgumentException("Incompatible merging of binary column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public long getSum() {
-      return sum;
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
-      bin.setSum(sum);
-      result.setBinaryStatistics(bin);
-      return result;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" sum: ");
-        buf.append(sum);
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
-      implements DecimalColumnStatistics {
-    private HiveDecimal minimum = null;
-    private HiveDecimal maximum = null;
-    private HiveDecimal sum = HiveDecimal.ZERO;
-
-    DecimalStatisticsImpl() {
-    }
-
-    DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
-      if (dec.hasMaximum()) {
-        maximum = HiveDecimal.create(dec.getMaximum());
-      }
-      if (dec.hasMinimum()) {
-        minimum = HiveDecimal.create(dec.getMinimum());
-      }
-      if (dec.hasSum()) {
-        sum = HiveDecimal.create(dec.getSum());
-      } else {
-        sum = null;
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-      sum = HiveDecimal.ZERO;
-    }
-
-    @Override
-    void updateDecimal(HiveDecimal value) {
-      if (minimum == null) {
-        minimum = value;
-        maximum = value;
-      } else if (minimum.compareTo(value) > 0) {
-        minimum = value;
-      } else if (maximum.compareTo(value) < 0) {
-        maximum = value;
-      }
-      if (sum != null) {
-        sum = sum.add(value);
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof DecimalStatisticsImpl) {
-        DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = dec.minimum;
-          maximum = dec.maximum;
-          sum = dec.sum;
-        } else if (dec.minimum != null) {
-          if (minimum.compareTo(dec.minimum) > 0) {
-            minimum = dec.minimum;
-          }
-          if (maximum.compareTo(dec.maximum) < 0) {
-            maximum = dec.maximum;
-          }
-          if (sum == null || dec.sum == null) {
-            sum = null;
-          } else {
-            sum = sum.add(dec.sum);
-          }
-        }
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.DecimalStatistics.Builder dec =
-          OrcProto.DecimalStatistics.newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
-        dec.setMinimum(minimum.toString());
-        dec.setMaximum(maximum.toString());
-      }
-      if (sum != null) {
-        dec.setSum(sum.toString());
-      }
-      result.setDecimalStatistics(dec);
-      return result;
-    }
-
-    @Override
-    public HiveDecimal getMinimum() {
-      return minimum;
-    }
-
-    @Override
-    public HiveDecimal getMaximum() {
-      return maximum;
-    }
-
-    @Override
-    public HiveDecimal getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(minimum);
-        buf.append(" max: ");
-        buf.append(maximum);
-        if (sum != null) {
-          buf.append(" sum: ");
-          buf.append(sum);
-        }
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class DateStatisticsImpl extends ColumnStatisticsImpl
-      implements DateColumnStatistics {
-    private Integer minimum = null;
-    private Integer maximum = null;
-
-    DateStatisticsImpl() {
-    }
-
-    DateStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.DateStatistics dateStats = stats.getDateStatistics();
-      // min,max values serialized/deserialized as int (days since epoch)
-      if (dateStats.hasMaximum()) {
-        maximum = dateStats.getMaximum();
-      }
-      if (dateStats.hasMinimum()) {
-        minimum = dateStats.getMinimum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-    }
-
-    @Override
-    void updateDate(DateWritable value) {
-      if (minimum == null) {
-        minimum = value.getDays();
-        maximum = value.getDays();
-      } else if (minimum > value.getDays()) {
-        minimum = value.getDays();
-      } else if (maximum < value.getDays()) {
-        maximum = value.getDays();
-      }
-    }
-
-    @Override
-    void updateDate(int value) {
-      if (minimum == null) {
-        minimum = value;
-        maximum = value;
-      } else if (minimum > value) {
-        minimum = value;
-      } else if (maximum < value) {
-        maximum = value;
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof DateStatisticsImpl) {
-        DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = dateStats.minimum;
-          maximum = dateStats.maximum;
-        } else if (dateStats.minimum != null) {
-          if (minimum > dateStats.minimum) {
-            minimum = dateStats.minimum;
-          }
-          if (maximum < dateStats.maximum) {
-            maximum = dateStats.maximum;
-          }
-        }
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of date column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.DateStatistics.Builder dateStats =
-          OrcProto.DateStatistics.newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
-        dateStats.setMinimum(minimum);
-        dateStats.setMaximum(maximum);
-      }
-      result.setDateStatistics(dateStats);
-      return result;
-    }
-
-    private transient final DateWritable minDate = new DateWritable();
-    private transient final DateWritable maxDate = new DateWritable();
-
-    @Override
-    public Date getMinimum() {
-      if (minimum == null) {
-        return null;
-      }
-      minDate.set(minimum);
-      return minDate.get();
-    }
-
-    @Override
-    public Date getMaximum() {
-      if (maximum == null) {
-        return null;
-      }
-      maxDate.set(maximum);
-      return maxDate.get();
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(getMinimum());
-        buf.append(" max: ");
-        buf.append(getMaximum());
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
-      implements TimestampColumnStatistics {
-    private Long minimum = null;
-    private Long maximum = null;
-
-    TimestampStatisticsImpl() {
-    }
-
-    TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
-      // min,max values serialized/deserialized as int (milliseconds since epoch)
-      if (timestampStats.hasMaximum()) {
-        maximum = timestampStats.getMaximum();
-      }
-      if (timestampStats.hasMinimum()) {
-        minimum = timestampStats.getMinimum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-    }
-
-    @Override
-    void updateTimestamp(Timestamp value) {
-      if (minimum == null) {
-        minimum = value.getTime();
-        maximum = value.getTime();
-      } else if (minimum > value.getTime()) {
-        minimum = value.getTime();
-      } else if (maximum < value.getTime()) {
-        maximum = value.getTime();
-      }
-    }
-
-    @Override
-    void updateTimestamp(long value) {
-      if (minimum == null) {
-        minimum = value;
-        maximum = value;
-      } else if (minimum > value) {
-        minimum = value;
-      } else if (maximum < value) {
-        maximum = value;
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof TimestampStatisticsImpl) {
-        TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = timestampStats.minimum;
-          maximum = timestampStats.maximum;
-        } else if (timestampStats.minimum != null) {
-          if (minimum > timestampStats.minimum) {
-            minimum = timestampStats.minimum;
-          }
-          if (maximum < timestampStats.maximum) {
-            maximum = timestampStats.maximum;
-          }
-        }
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
-          .newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
-        timestampStats.setMinimum(minimum);
-        timestampStats.setMaximum(maximum);
-      }
-      result.setTimestampStatistics(timestampStats);
-      return result;
-    }
-
-    @Override
-    public Timestamp getMinimum() {
-      return minimum == null ? null : new Timestamp(minimum);
-    }
-
-    @Override
-    public Timestamp getMaximum() {
-      return maximum == null ? null : new Timestamp(maximum);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(getMinimum());
-        buf.append(" max: ");
-        buf.append(getMaximum());
-      }
-      return buf.toString();
-    }
-  }
-
-  private long count = 0;
-  private boolean hasNull = false;
-
-  ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
-    if (stats.hasNumberOfValues()) {
-      count = stats.getNumberOfValues();
-    }
-
-    if (stats.hasHasNull()) {
-      hasNull = stats.getHasNull();
-    } else {
-      hasNull = true;
-    }
-  }
-
-  ColumnStatisticsImpl() {
-  }
-
-  void increment() {
-    count += 1;
-  }
-
-  void increment(int count) {
-    this.count += count;
-  }
-
-  void setNull() {
-    hasNull = true;
-  }
-
-  void updateBoolean(boolean value, int repetitions) {
-    throw new UnsupportedOperationException("Can't update boolean");
-  }
-
-  void updateInteger(long value, int repetitions) {
-    throw new UnsupportedOperationException("Can't update integer");
-  }
-
-  void updateDouble(double value) {
-    throw new UnsupportedOperationException("Can't update double");
-  }
-
-  void updateString(Text value) {
-    throw new UnsupportedOperationException("Can't update string");
-  }
-
-  void updateString(byte[] bytes, int offset, int length, int repetitions) {
-    throw new UnsupportedOperationException("Can't update string");
-  }
-
-  void updateBinary(BytesWritable value) {
-    throw new UnsupportedOperationException("Can't update binary");
-  }
-
-  void updateBinary(byte[] bytes, int offset, int length, int repetitions) {
-    throw new UnsupportedOperationException("Can't update string");
-  }
-
-  void updateDecimal(HiveDecimal value) {
-    throw new UnsupportedOperationException("Can't update decimal");
-  }
-
-  void updateDate(DateWritable value) {
-    throw new UnsupportedOperationException("Can't update date");
-  }
-
-  void updateDate(int value) {
-    throw new UnsupportedOperationException("Can't update date");
-  }
-
-  void updateTimestamp(Timestamp value) {
-    throw new UnsupportedOperationException("Can't update timestamp");
-  }
-
-  void updateTimestamp(long value) {
-    throw new UnsupportedOperationException("Can't update timestamp");
-  }
-
-  boolean isStatsExists() {
-    return (count > 0 || hasNull == true);
-  }
-
-  void merge(ColumnStatisticsImpl stats) {
-    count += stats.count;
-    hasNull |= stats.hasNull;
-  }
-
-  void reset() {
-    count = 0;
-    hasNull = false;
-  }
-
-  @Override
-  public long getNumberOfValues() {
-    return count;
-  }
-
-  @Override
-  public boolean hasNull() {
-    return hasNull;
-  }
-
-  @Override
-  public String toString() {
-    return "count: " + count + " hasNull: " + hasNull;
-  }
-
-  OrcProto.ColumnStatistics.Builder serialize() {
-    OrcProto.ColumnStatistics.Builder builder =
-      OrcProto.ColumnStatistics.newBuilder();
-    builder.setNumberOfValues(count);
-    builder.setHasNull(hasNull);
-    return builder;
-  }
-
-  static ColumnStatisticsImpl create(TypeDescription schema) {
-    switch (schema.getCategory()) {
-      case BOOLEAN:
-        return new BooleanStatisticsImpl();
-      case BYTE:
-      case SHORT:
-      case INT:
-      case LONG:
-        return new IntegerStatisticsImpl();
-      case FLOAT:
-      case DOUBLE:
-        return new DoubleStatisticsImpl();
-      case STRING:
-      case CHAR:
-      case VARCHAR:
-        return new StringStatisticsImpl();
-      case DECIMAL:
-        return new DecimalStatisticsImpl();
-      case DATE:
-        return new DateStatisticsImpl();
-      case TIMESTAMP:
-        return new TimestampStatisticsImpl();
-      case BINARY:
-        return new BinaryStatisticsImpl();
-      default:
-        return new ColumnStatisticsImpl();
-    }
-  }
-
-  static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
-    if (stats.hasBucketStatistics()) {
-      return new BooleanStatisticsImpl(stats);
-    } else if (stats.hasIntStatistics()) {
-      return new IntegerStatisticsImpl(stats);
-    } else if (stats.hasDoubleStatistics()) {
-      return new DoubleStatisticsImpl(stats);
-    } else if (stats.hasStringStatistics()) {
-      return new StringStatisticsImpl(stats);
-    } else if (stats.hasDecimalStatistics()) {
-      return new DecimalStatisticsImpl(stats);
-    } else if (stats.hasDateStatistics()) {
-      return new DateStatisticsImpl(stats);
-    } else if (stats.hasTimestampStatistics()) {
-      return new TimestampStatisticsImpl(stats);
-    } else if(stats.hasBinaryStatistics()) {
-      return new BinaryStatisticsImpl(stats);
-    } else {
-      return new ColumnStatisticsImpl(stats);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
deleted file mode 100644
index ed9d7ac..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-import javax.annotation.Nullable;
-
-public interface CompressionCodec {
-
-  public enum Modifier {
-    /* speed/compression tradeoffs */
-    FASTEST,
-    FAST,
-    DEFAULT,
-    /* data sensitivity modifiers */
-    TEXT,
-    BINARY
-  };
-
-  /**
-   * Compress the in buffer to the out buffer.
-   * @param in the bytes to compress
-   * @param out the uncompressed bytes
-   * @param overflow put any additional bytes here
-   * @return true if the output is smaller than input
-   * @throws IOException
-   */
-  boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
-                  ) throws IOException;
-
-  /**
-   * Decompress the in buffer to the out buffer.
-   * @param in the bytes to decompress
-   * @param out the decompressed bytes
-   * @throws IOException
-   */
-  void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
-
-  /**
-   * Produce a modified compression codec if the underlying algorithm allows
-   * modification.
-   *
-   * This does not modify the current object, but returns a new object if
-   * modifications are possible. Returns the same object if no modifications
-   * are possible.
-   * @param modifiers compression modifiers
-   * @return codec for use after optional modification
-   */
-  CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers);
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
index 07c6116..22627df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
@@ -20,8 +20,22 @@ package org.apache.hadoop.hive.ql.io.orc;
 
 /**
  * An enumeration that lists the generic compression algorithms that
- * can be applied to ORC files.
+ * can be applied to ORC files. This is a shim to help users while we
+ * migrate to the org.apache.orc package.
  */
 public enum CompressionKind {
-  NONE, ZLIB, SNAPPY, LZO
+  NONE(org.apache.orc.CompressionKind.NONE),
+  ZLIB(org.apache.orc.CompressionKind.ZLIB),
+  SNAPPY(org.apache.orc.CompressionKind.SNAPPY),
+  LZO(org.apache.orc.CompressionKind.LZO);
+
+  CompressionKind(org.apache.orc.CompressionKind underlying) {
+    this.underlying = underlying;
+  }
+
+  public org.apache.orc.CompressionKind getUnderlying() {
+    return underlying;
+  }
+
+  private final org.apache.orc.CompressionKind underlying;
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java
deleted file mode 100644
index e0d9943..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-
-/** An abstract data reader that IO formats can use to read bytes from underlying storage. */
-public interface DataReader {
-
-  /** Opens the DataReader, making it ready to use. */
-  void open() throws IOException;
-
-  /** Closes the DataReader. */
-  void close() throws IOException;
-
-  /** Reads the data.
-   *
-   * Note that for the cases such as zero-copy read, caller must release the disk ranges
-   * produced after being done with them. Call isTrackingDiskRanges to find out if this is needed.
-   * @param range List if disk ranges to read. Ranges with data will be ignored.
-   * @param baseOffset Base offset from the start of the file of the ranges in disk range list.
-   * @param doForceDirect Whether the data should be read into direct buffers.
-   * @return New or modified list of DiskRange-s, where all the ranges are filled with data.
-   */
-  DiskRangeList readFileData(
-      DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException;
-
-
-  /**
-   * Whether the user should release buffers created by readFileData. See readFileData javadoc.
-   */
-  boolean isTrackingDiskRanges();
-
-  /**
-   * Releases buffers created by readFileData. See readFileData javadoc.
-   * @param toRelease The buffer to release.
-   */
-  void releaseBuffer(ByteBuffer toRelease);
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
deleted file mode 100644
index ae3fe31..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.Date;
-
-/**
- * Statistics for DATE columns.
- */
-public interface DateColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum value for the column.
-   * @return minimum value
-   */
-  Date getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return maximum value
-   */
-  Date getMaximum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java
deleted file mode 100644
index ec6aa43..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-
-/**
- * Statistics for decimal columns.
- */
-public interface DecimalColumnStatistics extends ColumnStatistics {
-
-  /**
-   * Get the minimum value for the column.
-   * @return the minimum value
-   */
-  HiveDecimal getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return the maximum value
-   */
-  HiveDecimal getMaximum();
-
-  /**
-   * Get the sum of the values of the column.
-   * @return the sum
-   */
-  HiveDecimal getSum();
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java
deleted file mode 100644
index 41a77b0..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-public interface DirectDecompressionCodec extends CompressionCodec {
-  public boolean isAvailable();
-  public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
deleted file mode 100644
index 6af7535..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics for float and double columns.
- */
-public interface DoubleColumnStatistics extends ColumnStatistics {
-
-  /**
-   * Get the smallest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the minimum
-   */
-  double getMinimum();
-
-  /**
-   * Get the largest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the maximum
-   */
-  double getMaximum();
-
-  /**
-   * Get the sum of the values in the column.
-   * @return the sum
-   */
-  double getSum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
deleted file mode 100644
index 063c53c..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
+++ /dev/null
@@ -1,303 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.io.Text;
-
-/**
- * A class that is a growable array of bytes. Growth is managed in terms of
- * chunks that are allocated when needed.
- */
-public final class DynamicByteArray {
-  static final int DEFAULT_CHUNKSIZE = 32 * 1024;
-  static final int DEFAULT_NUM_CHUNKS = 128;
-
-  private final int chunkSize;        // our allocation sizes
-  private byte[][] data;              // the real data
-  private int length;                 // max set element index +1
-  private int initializedChunks = 0;  // the number of chunks created
-
-  public DynamicByteArray() {
-    this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE);
-  }
-
-  public DynamicByteArray(int numChunks, int chunkSize) {
-    if (chunkSize == 0) {
-      throw new IllegalArgumentException("bad chunksize");
-    }
-    this.chunkSize = chunkSize;
-    data = new byte[numChunks][];
-  }
-
-  /**
-   * Ensure that the given index is valid.
-   */
-  private void grow(int chunkIndex) {
-    if (chunkIndex >= initializedChunks) {
-      if (chunkIndex >= data.length) {
-        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
-        byte[][] newChunk = new byte[newSize][];
-        System.arraycopy(data, 0, newChunk, 0, data.length);
-        data = newChunk;
-      }
-      for(int i=initializedChunks; i <= chunkIndex; ++i) {
-        data[i] = new byte[chunkSize];
-      }
-      initializedChunks = chunkIndex + 1;
-    }
-  }
-
-  public byte get(int index) {
-    if (index >= length) {
-      throw new IndexOutOfBoundsException("Index " + index +
-                                            " is outside of 0.." +
-                                            (length - 1));
-    }
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    return data[i][j];
-  }
-
-  public void set(int index, byte value) {
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    grow(i);
-    if (index >= length) {
-      length = index + 1;
-    }
-    data[i][j] = value;
-  }
-
-  public int add(byte value) {
-    int i = length / chunkSize;
-    int j = length % chunkSize;
-    grow(i);
-    data[i][j] = value;
-    int result = length;
-    length += 1;
-    return result;
-  }
-
-  /**
-   * Copy a slice of a byte array into our buffer.
-   * @param value the array to copy from
-   * @param valueOffset the first location to copy from value
-   * @param valueLength the number of bytes to copy from value
-   * @return the offset of the start of the value
-   */
-  public int add(byte[] value, int valueOffset, int valueLength) {
-    int i = length / chunkSize;
-    int j = length % chunkSize;
-    grow((length + valueLength) / chunkSize);
-    int remaining = valueLength;
-    while (remaining > 0) {
-      int size = Math.min(remaining, chunkSize - j);
-      System.arraycopy(value, valueOffset, data[i], j, size);
-      remaining -= size;
-      valueOffset += size;
-      i += 1;
-      j = 0;
-    }
-    int result = length;
-    length += valueLength;
-    return result;
-  }
-
-  /**
-   * Read the entire stream into this array.
-   * @param in the stream to read from
-   * @throws IOException
-   */
-  public void readAll(InputStream in) throws IOException {
-    int currentChunk = length / chunkSize;
-    int currentOffset = length % chunkSize;
-    grow(currentChunk);
-    int currentLength = in.read(data[currentChunk], currentOffset,
-      chunkSize - currentOffset);
-    while (currentLength > 0) {
-      length += currentLength;
-      currentOffset = length % chunkSize;
-      if (currentOffset == 0) {
-        currentChunk = length / chunkSize;
-        grow(currentChunk);
-      }
-      currentLength = in.read(data[currentChunk], currentOffset,
-        chunkSize - currentOffset);
-    }
-  }
-
-  /**
-   * Byte compare a set of bytes against the bytes in this dynamic array.
-   * @param other source of the other bytes
-   * @param otherOffset start offset in the other array
-   * @param otherLength number of bytes in the other array
-   * @param ourOffset the offset in our array
-   * @param ourLength the number of bytes in our array
-   * @return negative for less, 0 for equal, positive for greater
-   */
-  public int compare(byte[] other, int otherOffset, int otherLength,
-                     int ourOffset, int ourLength) {
-    int currentChunk = ourOffset / chunkSize;
-    int currentOffset = ourOffset % chunkSize;
-    int maxLength = Math.min(otherLength, ourLength);
-    while (maxLength > 0 &&
-      other[otherOffset] == data[currentChunk][currentOffset]) {
-      otherOffset += 1;
-      currentOffset += 1;
-      if (currentOffset == chunkSize) {
-        currentChunk += 1;
-        currentOffset = 0;
-      }
-      maxLength -= 1;
-    }
-    if (maxLength == 0) {
-      return otherLength - ourLength;
-    }
-    int otherByte = 0xff & other[otherOffset];
-    int ourByte = 0xff & data[currentChunk][currentOffset];
-    return otherByte > ourByte ? 1 : -1;
-  }
-
-  /**
-   * Get the size of the array.
-   * @return the number of bytes in the array
-   */
-  public int size() {
-    return length;
-  }
-
-  /**
-   * Clear the array to its original pristine state.
-   */
-  public void clear() {
-    length = 0;
-    for(int i=0; i < data.length; ++i) {
-      data[i] = null;
-    }
-    initializedChunks = 0;
-  }
-
-  /**
-   * Set a text value from the bytes in this dynamic array.
-   * @param result the value to set
-   * @param offset the start of the bytes to copy
-   * @param length the number of bytes to copy
-   */
-  public void setText(Text result, int offset, int length) {
-    result.clear();
-    int currentChunk = offset / chunkSize;
-    int currentOffset = offset % chunkSize;
-    int currentLength = Math.min(length, chunkSize - currentOffset);
-    while (length > 0) {
-      result.append(data[currentChunk], currentOffset, currentLength);
-      length -= currentLength;
-      currentChunk += 1;
-      currentOffset = 0;
-      currentLength = Math.min(length, chunkSize - currentOffset);
-    }
-  }
-
-  /**
-   * Write out a range of this dynamic array to an output stream.
-   * @param out the stream to write to
-   * @param offset the first offset to write
-   * @param length the number of bytes to write
-   * @throws IOException
-   */
-  public void write(OutputStream out, int offset,
-                    int length) throws IOException {
-    int currentChunk = offset / chunkSize;
-    int currentOffset = offset % chunkSize;
-    while (length > 0) {
-      int currentLength = Math.min(length, chunkSize - currentOffset);
-      out.write(data[currentChunk], currentOffset, currentLength);
-      length -= currentLength;
-      currentChunk += 1;
-      currentOffset = 0;
-    }
-  }
-
-  @Override
-  public String toString() {
-    int i;
-    StringBuilder sb = new StringBuilder(length * 3);
-
-    sb.append('{');
-    int l = length - 1;
-    for (i=0; i<l; i++) {
-      sb.append(Integer.toHexString(get(i)));
-      sb.append(',');
-    }
-    sb.append(get(i));
-    sb.append('}');
-
-    return sb.toString();
-  }
-
-  public void setByteBuffer(ByteBuffer result, int offset, int length) {
-    result.clear();
-    int currentChunk = offset / chunkSize;
-    int currentOffset = offset % chunkSize;
-    int currentLength = Math.min(length, chunkSize - currentOffset);
-    while (length > 0) {
-      result.put(data[currentChunk], currentOffset, currentLength);
-      length -= currentLength;
-      currentChunk += 1;
-      currentOffset = 0;
-      currentLength = Math.min(length, chunkSize - currentOffset);
-    }
-  }
-
-  /**
-   * Gets all the bytes of the array.
-   *
-   * @return Bytes of the array
-   */
-  public byte[] get() {
-    byte[] result = null;
-    if (length > 0) {
-      int currentChunk = 0;
-      int currentOffset = 0;
-      int currentLength = Math.min(length, chunkSize);
-      int destOffset = 0;
-      result = new byte[length];
-      int totalLength = length;
-      while (totalLength > 0) {
-        System.arraycopy(data[currentChunk], currentOffset, result, destOffset, currentLength);
-        destOffset += currentLength;
-        totalLength -= currentLength;
-        currentChunk += 1;
-        currentOffset = 0;
-        currentLength = Math.min(totalLength, chunkSize - currentOffset);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Get the size of the buffers.
-   */
-  public long getSizeInBytes() {
-    return initializedChunks * chunkSize;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
deleted file mode 100644
index 2e884c0..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Dynamic int array that uses primitive types and chunks to avoid copying
- * large number of integers when it resizes.
- *
- * The motivation for this class is memory optimization, i.e. space efficient
- * storage of potentially huge arrays without good a-priori size guesses.
- *
- * The API of this class is between a primitive array and a AbstractList. It's
- * not a Collection implementation because it handles primitive types, but the
- * API could be extended to support iterators and the like.
- *
- * NOTE: Like standard Collection implementations/arrays, this class is not
- * synchronized.
- */
-final class DynamicIntArray {
-  static final int DEFAULT_CHUNKSIZE = 8 * 1024;
-  static final int INIT_CHUNKS = 128;
-
-  private final int chunkSize;       // our allocation size
-  private int[][] data;              // the real data
-  private int length;                // max set element index +1
-  private int initializedChunks = 0; // the number of created chunks
-
-  public DynamicIntArray() {
-    this(DEFAULT_CHUNKSIZE);
-  }
-
-  public DynamicIntArray(int chunkSize) {
-    this.chunkSize = chunkSize;
-
-    data = new int[INIT_CHUNKS][];
-  }
-
-  /**
-   * Ensure that the given index is valid.
-   */
-  private void grow(int chunkIndex) {
-    if (chunkIndex >= initializedChunks) {
-      if (chunkIndex >= data.length) {
-        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
-        int[][] newChunk = new int[newSize][];
-        System.arraycopy(data, 0, newChunk, 0, data.length);
-        data = newChunk;
-      }
-      for (int i=initializedChunks; i <= chunkIndex; ++i) {
-        data[i] = new int[chunkSize];
-      }
-      initializedChunks = chunkIndex + 1;
-    }
-  }
-
-  public int get(int index) {
-    if (index >= length) {
-      throw new IndexOutOfBoundsException("Index " + index +
-                                            " is outside of 0.." +
-                                            (length - 1));
-    }
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    return data[i][j];
-  }
-
-  public void set(int index, int value) {
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    grow(i);
-    if (index >= length) {
-      length = index + 1;
-    }
-    data[i][j] = value;
-  }
-
-  public void increment(int index, int value) {
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    grow(i);
-    if (index >= length) {
-      length = index + 1;
-    }
-    data[i][j] += value;
-  }
-
-  public void add(int value) {
-    int i = length / chunkSize;
-    int j = length % chunkSize;
-    grow(i);
-    data[i][j] = value;
-    length += 1;
-  }
-
-  public int size() {
-    return length;
-  }
-
-  public void clear() {
-    length = 0;
-    for(int i=0; i < data.length; ++i) {
-      data[i] = null;
-    }
-    initializedChunks = 0;
-  }
-
-  public String toString() {
-    int i;
-    StringBuilder sb = new StringBuilder(length * 4);
-
-    sb.append('{');
-    int l = length - 1;
-    for (i=0; i<l; i++) {
-      sb.append(get(i));
-      sb.append(',');
-    }
-    sb.append(get(i));
-    sb.append('}');
-
-    return sb.toString();
-  }
-
-  public int getSizeInBytes() {
-    return 4 * initializedChunks * chunkSize;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index f17b154..a242a37 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -41,8 +41,6 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
@@ -50,6 +48,13 @@ import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.orc.ColumnStatistics;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.ColumnStatisticsImpl;
+import org.apache.orc.impl.OrcIndex;
+import org.apache.orc.OrcProto;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.StripeStatistics;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONWriter;
 
@@ -381,7 +386,7 @@ public final class FileDump {
         for (int colIdx : rowIndexCols) {
           sargColumns[colIdx] = true;
         }
-        RecordReaderImpl.Index indices = rows
+        OrcIndex indices = rows
             .readRowIndex(stripeIx, null, null, null, sargColumns);
         for (int col : rowIndexCols) {
           StringBuilder buf = new StringBuilder();
@@ -649,9 +654,10 @@ public final class FileDump {
     return sb.toString();
   }
 
-  private static String getFormattedRowIndices(int col, RowIndex[] rowGroupIndex) {
+  private static String getFormattedRowIndices(int col,
+                                               OrcProto.RowIndex[] rowGroupIndex) {
     StringBuilder buf = new StringBuilder();
-    RowIndex index;
+    OrcProto.RowIndex index;
     buf.append("    Row group indices for column ").append(col).append(":");
     if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
         ((index = rowGroupIndex[col]) == null)) {
@@ -661,7 +667,7 @@ public final class FileDump {
 
     for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
       buf.append("\n      Entry ").append(entryIx).append(": ");
-      RowIndexEntry entry = index.getEntry(entryIx);
+      OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
       if (entry == null) {
         buf.append("unknown\n");
         continue;
@@ -686,7 +692,7 @@ public final class FileDump {
 
   public static long getTotalPaddingSize(Reader reader) throws IOException {
     long paddedBytes = 0;
-    List<org.apache.hadoop.hive.ql.io.orc.StripeInformation> stripes = reader.getStripes();
+    List<StripeInformation> stripes = reader.getStripes();
     for (int i = 1; i < stripes.size(); i++) {
       long prevStripeOffset = stripes.get(i - 1).getOffset();
       long prevStripeLen = stripes.get(i - 1).getLength();

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
deleted file mode 100644
index 95c674e..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.nio.ByteBuffer;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
-
-/**
- * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file
- * that is useful for Reader implementation
- *
- */
-public class FileMetaInfo {
-  ByteBuffer footerMetaAndPsBuffer;
-  final String compressionType;
-  final int bufferSize;
-  final int metadataSize;
-  final ByteBuffer footerBuffer;
-  final List<Integer> versionList;
-  final OrcFile.WriterVersion writerVersion;
-
-
-  /** Ctor used when reading splits - no version list or full footer buffer. */
-  FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
-      ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
-    this(compressionType, bufferSize, metadataSize, footerBuffer, null,
-        writerVersion, null);
-  }
-
-  /** Ctor used when creating file info during init and when getting a new one. */
-  public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
-      ByteBuffer footerBuffer, List<Integer> versionList, WriterVersion writerVersion,
-      ByteBuffer fullFooterBuffer) {
-    this.compressionType = compressionType;
-    this.bufferSize = bufferSize;
-    this.metadataSize = metadataSize;
-    this.footerBuffer = footerBuffer;
-    this.versionList = versionList;
-    this.writerVersion = writerVersion;
-    this.footerMetaAndPsBuffer = fullFooterBuffer;
-  }
-
-  public OrcFile.WriterVersion getWriterVersion() {
-    return writerVersion;
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java
deleted file mode 100644
index 26b27a3..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.io.orc.OrcProto;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
-
-/**
- * Cached file metadata. Right now, it caches everything; we don't have to store all the
- * protobuf structs actually, we could just store what we need, but that would require that
- * ORC stop depending on them too. Luckily, they shouldn't be very big.
- */
-public interface FileMetadata {
-  boolean isOriginalFormat();
-
-  List<StripeInformation> getStripes();
-
-  CompressionKind getCompressionKind();
-
-  int getCompressionBufferSize();
-
-  int getRowIndexStride();
-
-  int getColumnCount();
-
-  int getFlattenedColumnCount();
-
-  long getFileId();
-
-  List<Integer> getVersionList();
-
-  int getMetadataSize();
-
-  int getWriterVersionNum();
-
-  List<Type> getTypes();
-
-  List<OrcProto.StripeStatistics> getStripeStats();
-
-  long getContentLength();
-
-  long getNumberOfRows();
-
-  List<OrcProto.ColumnStatistics> getFileStats();
-}
\ No newline at end of file


Mime
View raw message