Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id F0B6418FEC for ; Fri, 11 Dec 2015 23:32:29 +0000 (UTC) Received: (qmail 79843 invoked by uid 500); 11 Dec 2015 23:32:29 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 79716 invoked by uid 500); 11 Dec 2015 23:32:29 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 79149 invoked by uid 99); 11 Dec 2015 23:32:28 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 11 Dec 2015 23:32:28 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 964A3E091A; Fri, 11 Dec 2015 23:32:28 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: omalley@apache.org To: commits@hive.apache.org Date: Fri, 11 Dec 2015 23:32:34 -0000 Message-Id: In-Reply-To: <3913588411994e92a66ca14a7bb1fa88@git.apache.org> References: <3913588411994e92a66ca14a7bb1fa88@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [07/16] hive git commit: HIVE-11890. Create ORC submodue. (omalley reviewed by prasanthj) http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java index 99a3e8d..2c9deac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java @@ -20,16 +20,16 @@ package org.apache.hadoop.hive.ql.exec; import java.io.IOException; import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.hadoop.hive.ql.io.orc.Writer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.orc.CompressionKind; +import org.apache.orc.CompressionKind; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcFileKeyWrapper; import org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper; import org.apache.hadoop.hive.ql.io.orc.Reader; -import org.apache.hadoop.hive.ql.io.orc.Writer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java b/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java index 56aec9f..878efbe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/filters/BloomFilterIO.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.io.filters; -import org.apache.hadoop.hive.ql.io.orc.OrcProto; +import org.apache.orc.OrcProto; import org.apache.hive.common.util.BloomFilter; import com.google.common.primitives.Longs; http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java deleted file mode 100644 index 23030a3..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BinaryColumnStatistics.java +++ /dev/null @@ -1,25 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -/** - * Statistics for binary columns. - */ -public interface BinaryColumnStatistics extends ColumnStatistics { - long getSum(); -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java deleted file mode 100644 index ec1f0a9..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java +++ /dev/null @@ -1,212 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.EOFException; -import java.io.IOException; - -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; - -public class BitFieldReader { - private final RunLengthByteReader input; - /** The number of bits in one item. Non-test code always uses 1. */ - private final int bitSize; - private int current; - private int bitsLeft; - private final int mask; - - public BitFieldReader(InStream input, - int bitSize) throws IOException { - this.input = new RunLengthByteReader(input); - this.bitSize = bitSize; - mask = (1 << bitSize) - 1; - } - - public void setInStream(InStream inStream) { - this.input.setInStream(inStream); - } - - private void readByte() throws IOException { - if (input.hasNext()) { - current = 0xff & input.next(); - bitsLeft = 8; - } else { - throw new EOFException("Read past end of bit field from " + this); - } - } - - public int next() throws IOException { - int result = 0; - int bitsLeftToRead = bitSize; - while (bitsLeftToRead > bitsLeft) { - result <<= bitsLeft; - result |= current & ((1 << bitsLeft) - 1); - bitsLeftToRead -= bitsLeft; - readByte(); - } - if (bitsLeftToRead > 0) { - result <<= bitsLeftToRead; - bitsLeft -= bitsLeftToRead; - result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1); - } - return result & mask; - } - - /** - * Unlike integer readers, where runs are encoded explicitly, in this one we have to read ahead - * to figure out whether we have a run. Given that runs in booleans are likely it's worth it. - * However it means we'd need to keep track of how many bytes we read, and next/nextVector won't - * work anymore once this is called. These is trivial to fix, but these are never interspersed. - */ - private boolean lastRunValue; - private int lastRunLength = -1; - private void readNextRun(int maxRunLength) throws IOException { - assert bitSize == 1; - if (lastRunLength > 0) return; // last run is not exhausted yet - if (bitsLeft == 0) { - readByte(); - } - // First take care of the partial bits. - boolean hasVal = false; - int runLength = 0; - if (bitsLeft != 8) { - int partialBitsMask = (1 << bitsLeft) - 1; - int partialBits = current & partialBitsMask; - if (partialBits == partialBitsMask || partialBits == 0) { - lastRunValue = (partialBits == partialBitsMask); - if (maxRunLength <= bitsLeft) { - lastRunLength = maxRunLength; - return; - } - maxRunLength -= bitsLeft; - hasVal = true; - runLength = bitsLeft; - bitsLeft = 0; - } else { - // There's no run in partial bits. Return whatever we have. - int prefixBitsCount = 32 - bitsLeft; - runLength = Integer.numberOfLeadingZeros(partialBits) - prefixBitsCount; - lastRunValue = (runLength > 0); - lastRunLength = Math.min(maxRunLength, lastRunValue ? runLength : - (Integer.numberOfLeadingZeros(~(partialBits | ~partialBitsMask)) - prefixBitsCount)); - return; - } - assert bitsLeft == 0; - readByte(); - } - if (!hasVal) { - lastRunValue = ((current >> 7) == 1); - hasVal = true; - } - // Read full bytes until the run ends. - assert bitsLeft == 8; - while (maxRunLength >= 8 - && ((lastRunValue && (current == 0xff)) || (!lastRunValue && (current == 0)))) { - runLength += 8; - maxRunLength -= 8; - readByte(); - } - if (maxRunLength > 0) { - int extraBits = Integer.numberOfLeadingZeros( - lastRunValue ? (~(current | ~255)) : current) - 24; - bitsLeft -= extraBits; - runLength += extraBits; - } - lastRunLength = runLength; - } - - void nextVector(LongColumnVector previous, long previousLen) throws IOException { - previous.isRepeating = true; - for (int i = 0; i < previousLen; i++) { - if (!previous.isNull[i]) { - previous.vector[i] = next(); - } else { - // The default value of null for int types in vectorized - // processing is 1, so set that if the value is null - previous.vector[i] = 1; - } - - // The default value for nulls in Vectorization for int types is 1 - // and given that non null value can also be 1, we need to check for isNull also - // when determining the isRepeating flag. - if (previous.isRepeating - && i > 0 - && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) { - previous.isRepeating = false; - } - } - } - - public void seek(PositionProvider index) throws IOException { - input.seek(index); - int consumed = (int) index.getNext(); - if (consumed > 8) { - throw new IllegalArgumentException("Seek past end of byte at " + - consumed + " in " + input); - } else if (consumed != 0) { - readByte(); - bitsLeft = 8 - consumed; - } else { - bitsLeft = 0; - } - } - - void skip(long items) throws IOException { - long totalBits = bitSize * items; - if (bitsLeft >= totalBits) { - bitsLeft -= totalBits; - } else { - totalBits -= bitsLeft; - input.skip(totalBits / 8); - current = input.next(); - bitsLeft = (int) (8 - (totalBits % 8)); - } - } - - @Override - public String toString() { - return "bit reader current: " + current + " bits left: " + bitsLeft + - " bit size: " + bitSize + " from " + input; - } - - boolean hasFullByte() { - return bitsLeft == 8 || bitsLeft == 0; - } - - int peekOneBit() throws IOException { - assert bitSize == 1; - if (bitsLeft == 0) { - readByte(); - } - return (current >>> (bitsLeft - 1)) & 1; - } - - int peekFullByte() throws IOException { - assert bitSize == 1; - assert bitsLeft == 8 || bitsLeft == 0; - if (bitsLeft == 0) { - readByte(); - } - return current; - } - - void skipInCurrentByte(int bits) throws IOException { - assert bitsLeft >= bits; - bitsLeft -= bits; - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java deleted file mode 100644 index 0608da6..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; - -class BitFieldWriter { - private RunLengthByteWriter output; - private final int bitSize; - private byte current = 0; - private int bitsLeft = 8; - - BitFieldWriter(PositionedOutputStream output, - int bitSize) throws IOException { - this.output = new RunLengthByteWriter(output); - this.bitSize = bitSize; - } - - private void writeByte() throws IOException { - output.write(current); - current = 0; - bitsLeft = 8; - } - - void flush() throws IOException { - if (bitsLeft != 8) { - writeByte(); - } - output.flush(); - } - - void write(int value) throws IOException { - int bitsToWrite = bitSize; - while (bitsToWrite > bitsLeft) { - // add the bits to the bottom of the current word - current |= value >>> (bitsToWrite - bitsLeft); - // subtract out the bits we just added - bitsToWrite -= bitsLeft; - // zero out the bits above bitsToWrite - value &= (1 << bitsToWrite) - 1; - writeByte(); - } - bitsLeft -= bitsToWrite; - current |= value << bitsLeft; - if (bitsLeft == 0) { - writeByte(); - } - } - - void getPosition(PositionRecorder recorder) throws IOException { - output.getPosition(recorder); - recorder.addPosition(8 - bitsLeft); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java deleted file mode 100644 index 6d03998..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -/** - * Statistics for boolean columns. - */ -public interface BooleanColumnStatistics extends ColumnStatistics { - long getFalseCount(); - - long getTrueCount(); -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java deleted file mode 100644 index 7c973c2..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -/** - * Statistics that are available for all types of columns. - */ -public interface ColumnStatistics { - /** - * Get the number of values in this column. It will differ from the number - * of rows because of NULL values and repeated values. - * @return the number of values - */ - long getNumberOfValues(); - - /** - * Returns true if there are nulls in the scope of column statistics. - * @return true if null present else false - */ - boolean hasNull(); -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java deleted file mode 100644 index bcca9de..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java +++ /dev/null @@ -1,1082 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.sql.Date; -import java.sql.Timestamp; - -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparator; - -class ColumnStatisticsImpl implements ColumnStatistics { - - private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl - implements BooleanColumnStatistics { - private long trueCount = 0; - - BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.BucketStatistics bkt = stats.getBucketStatistics(); - trueCount = bkt.getCount(0); - } - - BooleanStatisticsImpl() { - } - - @Override - void reset() { - super.reset(); - trueCount = 0; - } - - @Override - void updateBoolean(boolean value, int repetitions) { - if (value) { - trueCount += repetitions; - } - } - - @Override - void merge(ColumnStatisticsImpl other) { - if (other instanceof BooleanStatisticsImpl) { - BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other; - trueCount += bkt.trueCount; - } else { - if (isStatsExists() && trueCount != 0) { - throw new IllegalArgumentException("Incompatible merging of boolean column statistics"); - } - } - super.merge(other); - } - - @Override - OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder builder = super.serialize(); - OrcProto.BucketStatistics.Builder bucket = - OrcProto.BucketStatistics.newBuilder(); - bucket.addCount(trueCount); - builder.setBucketStatistics(bucket); - return builder; - } - - @Override - public long getFalseCount() { - return getNumberOfValues() - trueCount; - } - - @Override - public long getTrueCount() { - return trueCount; - } - - @Override - public String toString() { - return super.toString() + " true: " + trueCount; - } - } - - private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl - implements IntegerColumnStatistics { - - private long minimum = Long.MAX_VALUE; - private long maximum = Long.MIN_VALUE; - private long sum = 0; - private boolean hasMinimum = false; - private boolean overflow = false; - - IntegerStatisticsImpl() { - } - - IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.IntegerStatistics intStat = stats.getIntStatistics(); - if (intStat.hasMinimum()) { - hasMinimum = true; - minimum = intStat.getMinimum(); - } - if (intStat.hasMaximum()) { - maximum = intStat.getMaximum(); - } - if (intStat.hasSum()) { - sum = intStat.getSum(); - } else { - overflow = true; - } - } - - @Override - void reset() { - super.reset(); - hasMinimum = false; - minimum = Long.MAX_VALUE; - maximum = Long.MIN_VALUE; - sum = 0; - overflow = false; - } - - @Override - void updateInteger(long value, int repetitions) { - if (!hasMinimum) { - hasMinimum = true; - minimum = value; - maximum = value; - } else if (value < minimum) { - minimum = value; - } else if (value > maximum) { - maximum = value; - } - if (!overflow) { - boolean wasPositive = sum >= 0; - sum += value * repetitions; - if ((value >= 0) == wasPositive) { - overflow = (sum >= 0) != wasPositive; - } - } - } - - @Override - void merge(ColumnStatisticsImpl other) { - if (other instanceof IntegerStatisticsImpl) { - IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other; - if (!hasMinimum) { - hasMinimum = otherInt.hasMinimum; - minimum = otherInt.minimum; - maximum = otherInt.maximum; - } else if (otherInt.hasMinimum) { - if (otherInt.minimum < minimum) { - minimum = otherInt.minimum; - } - if (otherInt.maximum > maximum) { - maximum = otherInt.maximum; - } - } - - overflow |= otherInt.overflow; - if (!overflow) { - boolean wasPositive = sum >= 0; - sum += otherInt.sum; - if ((otherInt.sum >= 0) == wasPositive) { - overflow = (sum >= 0) != wasPositive; - } - } - } else { - if (isStatsExists() && hasMinimum) { - throw new IllegalArgumentException("Incompatible merging of integer column statistics"); - } - } - super.merge(other); - } - - @Override - OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder builder = super.serialize(); - OrcProto.IntegerStatistics.Builder intb = - OrcProto.IntegerStatistics.newBuilder(); - if (hasMinimum) { - intb.setMinimum(minimum); - intb.setMaximum(maximum); - } - if (!overflow) { - intb.setSum(sum); - } - builder.setIntStatistics(intb); - return builder; - } - - @Override - public long getMinimum() { - return minimum; - } - - @Override - public long getMaximum() { - return maximum; - } - - @Override - public boolean isSumDefined() { - return !overflow; - } - - @Override - public long getSum() { - return sum; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (hasMinimum) { - buf.append(" min: "); - buf.append(minimum); - buf.append(" max: "); - buf.append(maximum); - } - if (!overflow) { - buf.append(" sum: "); - buf.append(sum); - } - return buf.toString(); - } - } - - private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl - implements DoubleColumnStatistics { - private boolean hasMinimum = false; - private double minimum = Double.MAX_VALUE; - private double maximum = Double.MIN_VALUE; - private double sum = 0; - - DoubleStatisticsImpl() { - } - - DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics(); - if (dbl.hasMinimum()) { - hasMinimum = true; - minimum = dbl.getMinimum(); - } - if (dbl.hasMaximum()) { - maximum = dbl.getMaximum(); - } - if (dbl.hasSum()) { - sum = dbl.getSum(); - } - } - - @Override - void reset() { - super.reset(); - hasMinimum = false; - minimum = Double.MAX_VALUE; - maximum = Double.MIN_VALUE; - sum = 0; - } - - @Override - void updateDouble(double value) { - if (!hasMinimum) { - hasMinimum = true; - minimum = value; - maximum = value; - } else if (value < minimum) { - minimum = value; - } else if (value > maximum) { - maximum = value; - } - sum += value; - } - - @Override - void merge(ColumnStatisticsImpl other) { - if (other instanceof DoubleStatisticsImpl) { - DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other; - if (!hasMinimum) { - hasMinimum = dbl.hasMinimum; - minimum = dbl.minimum; - maximum = dbl.maximum; - } else if (dbl.hasMinimum) { - if (dbl.minimum < minimum) { - minimum = dbl.minimum; - } - if (dbl.maximum > maximum) { - maximum = dbl.maximum; - } - } - sum += dbl.sum; - } else { - if (isStatsExists() && hasMinimum) { - throw new IllegalArgumentException("Incompatible merging of double column statistics"); - } - } - super.merge(other); - } - - @Override - OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder builder = super.serialize(); - OrcProto.DoubleStatistics.Builder dbl = - OrcProto.DoubleStatistics.newBuilder(); - if (hasMinimum) { - dbl.setMinimum(minimum); - dbl.setMaximum(maximum); - } - dbl.setSum(sum); - builder.setDoubleStatistics(dbl); - return builder; - } - - @Override - public double getMinimum() { - return minimum; - } - - @Override - public double getMaximum() { - return maximum; - } - - @Override - public double getSum() { - return sum; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (hasMinimum) { - buf.append(" min: "); - buf.append(minimum); - buf.append(" max: "); - buf.append(maximum); - } - buf.append(" sum: "); - buf.append(sum); - return buf.toString(); - } - } - - protected static final class StringStatisticsImpl extends ColumnStatisticsImpl - implements StringColumnStatistics { - private Text minimum = null; - private Text maximum = null; - private long sum = 0; - - StringStatisticsImpl() { - } - - StringStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.StringStatistics str = stats.getStringStatistics(); - if (str.hasMaximum()) { - maximum = new Text(str.getMaximum()); - } - if (str.hasMinimum()) { - minimum = new Text(str.getMinimum()); - } - if(str.hasSum()) { - sum = str.getSum(); - } - } - - @Override - void reset() { - super.reset(); - minimum = null; - maximum = null; - sum = 0; - } - - @Override - void updateString(Text value) { - if (minimum == null) { - maximum = minimum = new Text(value); - } else if (minimum.compareTo(value) > 0) { - minimum = new Text(value); - } else if (maximum.compareTo(value) < 0) { - maximum = new Text(value); - } - sum += value.getLength(); - } - - @Override - void updateString(byte[] bytes, int offset, int length, int repetitions) { - if (minimum == null) { - maximum = minimum = new Text(); - maximum.set(bytes, offset, length); - } else if (WritableComparator.compareBytes(minimum.getBytes(), 0, - minimum.getLength(), bytes, offset, length) > 0) { - minimum = new Text(); - minimum.set(bytes, offset, length); - } else if (WritableComparator.compareBytes(maximum.getBytes(), 0, - maximum.getLength(), bytes, offset, length) < 0) { - maximum = new Text(); - maximum.set(bytes, offset, length); - } - sum += length * repetitions; - } - - @Override - void merge(ColumnStatisticsImpl other) { - if (other instanceof StringStatisticsImpl) { - StringStatisticsImpl str = (StringStatisticsImpl) other; - if (minimum == null) { - if (str.minimum != null) { - maximum = new Text(str.getMaximum()); - minimum = new Text(str.getMinimum()); - } else { - /* both are empty */ - maximum = minimum = null; - } - } else if (str.minimum != null) { - if (minimum.compareTo(str.minimum) > 0) { - minimum = new Text(str.getMinimum()); - } - if (maximum.compareTo(str.maximum) < 0) { - maximum = new Text(str.getMaximum()); - } - } - sum += str.sum; - } else { - if (isStatsExists() && minimum != null) { - throw new IllegalArgumentException("Incompatible merging of string column statistics"); - } - } - super.merge(other); - } - - @Override - OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.StringStatistics.Builder str = - OrcProto.StringStatistics.newBuilder(); - if (getNumberOfValues() != 0) { - str.setMinimum(getMinimum()); - str.setMaximum(getMaximum()); - str.setSum(sum); - } - result.setStringStatistics(str); - return result; - } - - @Override - public String getMinimum() { - return minimum == null ? null : minimum.toString(); - } - - @Override - public String getMaximum() { - return maximum == null ? null : maximum.toString(); - } - - @Override - public long getSum() { - return sum; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" min: "); - buf.append(getMinimum()); - buf.append(" max: "); - buf.append(getMaximum()); - buf.append(" sum: "); - buf.append(sum); - } - return buf.toString(); - } - } - - protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements - BinaryColumnStatistics { - - private long sum = 0; - - BinaryStatisticsImpl() { - } - - BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics(); - if (binStats.hasSum()) { - sum = binStats.getSum(); - } - } - - @Override - void reset() { - super.reset(); - sum = 0; - } - - @Override - void updateBinary(BytesWritable value) { - sum += value.getLength(); - } - - @Override - void updateBinary(byte[] bytes, int offset, int length, int repetitions) { - sum += length * repetitions; - } - - @Override - void merge(ColumnStatisticsImpl other) { - if (other instanceof BinaryColumnStatistics) { - BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other; - sum += bin.sum; - } else { - if (isStatsExists() && sum != 0) { - throw new IllegalArgumentException("Incompatible merging of binary column statistics"); - } - } - super.merge(other); - } - - @Override - public long getSum() { - return sum; - } - - @Override - OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder(); - bin.setSum(sum); - result.setBinaryStatistics(bin); - return result; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" sum: "); - buf.append(sum); - } - return buf.toString(); - } - } - - private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl - implements DecimalColumnStatistics { - private HiveDecimal minimum = null; - private HiveDecimal maximum = null; - private HiveDecimal sum = HiveDecimal.ZERO; - - DecimalStatisticsImpl() { - } - - DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.DecimalStatistics dec = stats.getDecimalStatistics(); - if (dec.hasMaximum()) { - maximum = HiveDecimal.create(dec.getMaximum()); - } - if (dec.hasMinimum()) { - minimum = HiveDecimal.create(dec.getMinimum()); - } - if (dec.hasSum()) { - sum = HiveDecimal.create(dec.getSum()); - } else { - sum = null; - } - } - - @Override - void reset() { - super.reset(); - minimum = null; - maximum = null; - sum = HiveDecimal.ZERO; - } - - @Override - void updateDecimal(HiveDecimal value) { - if (minimum == null) { - minimum = value; - maximum = value; - } else if (minimum.compareTo(value) > 0) { - minimum = value; - } else if (maximum.compareTo(value) < 0) { - maximum = value; - } - if (sum != null) { - sum = sum.add(value); - } - } - - @Override - void merge(ColumnStatisticsImpl other) { - if (other instanceof DecimalStatisticsImpl) { - DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other; - if (minimum == null) { - minimum = dec.minimum; - maximum = dec.maximum; - sum = dec.sum; - } else if (dec.minimum != null) { - if (minimum.compareTo(dec.minimum) > 0) { - minimum = dec.minimum; - } - if (maximum.compareTo(dec.maximum) < 0) { - maximum = dec.maximum; - } - if (sum == null || dec.sum == null) { - sum = null; - } else { - sum = sum.add(dec.sum); - } - } - } else { - if (isStatsExists() && minimum != null) { - throw new IllegalArgumentException("Incompatible merging of decimal column statistics"); - } - } - super.merge(other); - } - - @Override - OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.DecimalStatistics.Builder dec = - OrcProto.DecimalStatistics.newBuilder(); - if (getNumberOfValues() != 0 && minimum != null) { - dec.setMinimum(minimum.toString()); - dec.setMaximum(maximum.toString()); - } - if (sum != null) { - dec.setSum(sum.toString()); - } - result.setDecimalStatistics(dec); - return result; - } - - @Override - public HiveDecimal getMinimum() { - return minimum; - } - - @Override - public HiveDecimal getMaximum() { - return maximum; - } - - @Override - public HiveDecimal getSum() { - return sum; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" min: "); - buf.append(minimum); - buf.append(" max: "); - buf.append(maximum); - if (sum != null) { - buf.append(" sum: "); - buf.append(sum); - } - } - return buf.toString(); - } - } - - private static final class DateStatisticsImpl extends ColumnStatisticsImpl - implements DateColumnStatistics { - private Integer minimum = null; - private Integer maximum = null; - - DateStatisticsImpl() { - } - - DateStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.DateStatistics dateStats = stats.getDateStatistics(); - // min,max values serialized/deserialized as int (days since epoch) - if (dateStats.hasMaximum()) { - maximum = dateStats.getMaximum(); - } - if (dateStats.hasMinimum()) { - minimum = dateStats.getMinimum(); - } - } - - @Override - void reset() { - super.reset(); - minimum = null; - maximum = null; - } - - @Override - void updateDate(DateWritable value) { - if (minimum == null) { - minimum = value.getDays(); - maximum = value.getDays(); - } else if (minimum > value.getDays()) { - minimum = value.getDays(); - } else if (maximum < value.getDays()) { - maximum = value.getDays(); - } - } - - @Override - void updateDate(int value) { - if (minimum == null) { - minimum = value; - maximum = value; - } else if (minimum > value) { - minimum = value; - } else if (maximum < value) { - maximum = value; - } - } - - @Override - void merge(ColumnStatisticsImpl other) { - if (other instanceof DateStatisticsImpl) { - DateStatisticsImpl dateStats = (DateStatisticsImpl) other; - if (minimum == null) { - minimum = dateStats.minimum; - maximum = dateStats.maximum; - } else if (dateStats.minimum != null) { - if (minimum > dateStats.minimum) { - minimum = dateStats.minimum; - } - if (maximum < dateStats.maximum) { - maximum = dateStats.maximum; - } - } - } else { - if (isStatsExists() && minimum != null) { - throw new IllegalArgumentException("Incompatible merging of date column statistics"); - } - } - super.merge(other); - } - - @Override - OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.DateStatistics.Builder dateStats = - OrcProto.DateStatistics.newBuilder(); - if (getNumberOfValues() != 0 && minimum != null) { - dateStats.setMinimum(minimum); - dateStats.setMaximum(maximum); - } - result.setDateStatistics(dateStats); - return result; - } - - private transient final DateWritable minDate = new DateWritable(); - private transient final DateWritable maxDate = new DateWritable(); - - @Override - public Date getMinimum() { - if (minimum == null) { - return null; - } - minDate.set(minimum); - return minDate.get(); - } - - @Override - public Date getMaximum() { - if (maximum == null) { - return null; - } - maxDate.set(maximum); - return maxDate.get(); - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" min: "); - buf.append(getMinimum()); - buf.append(" max: "); - buf.append(getMaximum()); - } - return buf.toString(); - } - } - - private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl - implements TimestampColumnStatistics { - private Long minimum = null; - private Long maximum = null; - - TimestampStatisticsImpl() { - } - - TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics(); - // min,max values serialized/deserialized as int (milliseconds since epoch) - if (timestampStats.hasMaximum()) { - maximum = timestampStats.getMaximum(); - } - if (timestampStats.hasMinimum()) { - minimum = timestampStats.getMinimum(); - } - } - - @Override - void reset() { - super.reset(); - minimum = null; - maximum = null; - } - - @Override - void updateTimestamp(Timestamp value) { - if (minimum == null) { - minimum = value.getTime(); - maximum = value.getTime(); - } else if (minimum > value.getTime()) { - minimum = value.getTime(); - } else if (maximum < value.getTime()) { - maximum = value.getTime(); - } - } - - @Override - void updateTimestamp(long value) { - if (minimum == null) { - minimum = value; - maximum = value; - } else if (minimum > value) { - minimum = value; - } else if (maximum < value) { - maximum = value; - } - } - - @Override - void merge(ColumnStatisticsImpl other) { - if (other instanceof TimestampStatisticsImpl) { - TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other; - if (minimum == null) { - minimum = timestampStats.minimum; - maximum = timestampStats.maximum; - } else if (timestampStats.minimum != null) { - if (minimum > timestampStats.minimum) { - minimum = timestampStats.minimum; - } - if (maximum < timestampStats.maximum) { - maximum = timestampStats.maximum; - } - } - } else { - if (isStatsExists() && minimum != null) { - throw new IllegalArgumentException("Incompatible merging of timestamp column statistics"); - } - } - super.merge(other); - } - - @Override - OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics - .newBuilder(); - if (getNumberOfValues() != 0 && minimum != null) { - timestampStats.setMinimum(minimum); - timestampStats.setMaximum(maximum); - } - result.setTimestampStatistics(timestampStats); - return result; - } - - @Override - public Timestamp getMinimum() { - return minimum == null ? null : new Timestamp(minimum); - } - - @Override - public Timestamp getMaximum() { - return maximum == null ? null : new Timestamp(maximum); - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" min: "); - buf.append(getMinimum()); - buf.append(" max: "); - buf.append(getMaximum()); - } - return buf.toString(); - } - } - - private long count = 0; - private boolean hasNull = false; - - ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) { - if (stats.hasNumberOfValues()) { - count = stats.getNumberOfValues(); - } - - if (stats.hasHasNull()) { - hasNull = stats.getHasNull(); - } else { - hasNull = true; - } - } - - ColumnStatisticsImpl() { - } - - void increment() { - count += 1; - } - - void increment(int count) { - this.count += count; - } - - void setNull() { - hasNull = true; - } - - void updateBoolean(boolean value, int repetitions) { - throw new UnsupportedOperationException("Can't update boolean"); - } - - void updateInteger(long value, int repetitions) { - throw new UnsupportedOperationException("Can't update integer"); - } - - void updateDouble(double value) { - throw new UnsupportedOperationException("Can't update double"); - } - - void updateString(Text value) { - throw new UnsupportedOperationException("Can't update string"); - } - - void updateString(byte[] bytes, int offset, int length, int repetitions) { - throw new UnsupportedOperationException("Can't update string"); - } - - void updateBinary(BytesWritable value) { - throw new UnsupportedOperationException("Can't update binary"); - } - - void updateBinary(byte[] bytes, int offset, int length, int repetitions) { - throw new UnsupportedOperationException("Can't update string"); - } - - void updateDecimal(HiveDecimal value) { - throw new UnsupportedOperationException("Can't update decimal"); - } - - void updateDate(DateWritable value) { - throw new UnsupportedOperationException("Can't update date"); - } - - void updateDate(int value) { - throw new UnsupportedOperationException("Can't update date"); - } - - void updateTimestamp(Timestamp value) { - throw new UnsupportedOperationException("Can't update timestamp"); - } - - void updateTimestamp(long value) { - throw new UnsupportedOperationException("Can't update timestamp"); - } - - boolean isStatsExists() { - return (count > 0 || hasNull == true); - } - - void merge(ColumnStatisticsImpl stats) { - count += stats.count; - hasNull |= stats.hasNull; - } - - void reset() { - count = 0; - hasNull = false; - } - - @Override - public long getNumberOfValues() { - return count; - } - - @Override - public boolean hasNull() { - return hasNull; - } - - @Override - public String toString() { - return "count: " + count + " hasNull: " + hasNull; - } - - OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder builder = - OrcProto.ColumnStatistics.newBuilder(); - builder.setNumberOfValues(count); - builder.setHasNull(hasNull); - return builder; - } - - static ColumnStatisticsImpl create(TypeDescription schema) { - switch (schema.getCategory()) { - case BOOLEAN: - return new BooleanStatisticsImpl(); - case BYTE: - case SHORT: - case INT: - case LONG: - return new IntegerStatisticsImpl(); - case FLOAT: - case DOUBLE: - return new DoubleStatisticsImpl(); - case STRING: - case CHAR: - case VARCHAR: - return new StringStatisticsImpl(); - case DECIMAL: - return new DecimalStatisticsImpl(); - case DATE: - return new DateStatisticsImpl(); - case TIMESTAMP: - return new TimestampStatisticsImpl(); - case BINARY: - return new BinaryStatisticsImpl(); - default: - return new ColumnStatisticsImpl(); - } - } - - static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) { - if (stats.hasBucketStatistics()) { - return new BooleanStatisticsImpl(stats); - } else if (stats.hasIntStatistics()) { - return new IntegerStatisticsImpl(stats); - } else if (stats.hasDoubleStatistics()) { - return new DoubleStatisticsImpl(stats); - } else if (stats.hasStringStatistics()) { - return new StringStatisticsImpl(stats); - } else if (stats.hasDecimalStatistics()) { - return new DecimalStatisticsImpl(stats); - } else if (stats.hasDateStatistics()) { - return new DateStatisticsImpl(stats); - } else if (stats.hasTimestampStatistics()) { - return new TimestampStatisticsImpl(stats); - } else if(stats.hasBinaryStatistics()) { - return new BinaryStatisticsImpl(stats); - } else { - return new ColumnStatisticsImpl(stats); - } - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java deleted file mode 100644 index ed9d7ac..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.EnumSet; - -import javax.annotation.Nullable; - -public interface CompressionCodec { - - public enum Modifier { - /* speed/compression tradeoffs */ - FASTEST, - FAST, - DEFAULT, - /* data sensitivity modifiers */ - TEXT, - BINARY - }; - - /** - * Compress the in buffer to the out buffer. - * @param in the bytes to compress - * @param out the uncompressed bytes - * @param overflow put any additional bytes here - * @return true if the output is smaller than input - * @throws IOException - */ - boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow - ) throws IOException; - - /** - * Decompress the in buffer to the out buffer. - * @param in the bytes to decompress - * @param out the decompressed bytes - * @throws IOException - */ - void decompress(ByteBuffer in, ByteBuffer out) throws IOException; - - /** - * Produce a modified compression codec if the underlying algorithm allows - * modification. - * - * This does not modify the current object, but returns a new object if - * modifications are possible. Returns the same object if no modifications - * are possible. - * @param modifiers compression modifiers - * @return codec for use after optional modification - */ - CompressionCodec modify(@Nullable EnumSet modifiers); - -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java index 07c6116..22627df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java @@ -20,8 +20,22 @@ package org.apache.hadoop.hive.ql.io.orc; /** * An enumeration that lists the generic compression algorithms that - * can be applied to ORC files. + * can be applied to ORC files. This is a shim to help users while we + * migrate to the org.apache.orc package. */ public enum CompressionKind { - NONE, ZLIB, SNAPPY, LZO + NONE(org.apache.orc.CompressionKind.NONE), + ZLIB(org.apache.orc.CompressionKind.ZLIB), + SNAPPY(org.apache.orc.CompressionKind.SNAPPY), + LZO(org.apache.orc.CompressionKind.LZO); + + CompressionKind(org.apache.orc.CompressionKind underlying) { + this.underlying = underlying; + } + + public org.apache.orc.CompressionKind getUnderlying() { + return underlying; + } + + private final org.apache.orc.CompressionKind underlying; } http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java deleted file mode 100644 index e0d9943..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DataReader.java +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; -import java.nio.ByteBuffer; - -import org.apache.hadoop.hive.common.io.DiskRangeList; - -/** An abstract data reader that IO formats can use to read bytes from underlying storage. */ -public interface DataReader { - - /** Opens the DataReader, making it ready to use. */ - void open() throws IOException; - - /** Closes the DataReader. */ - void close() throws IOException; - - /** Reads the data. - * - * Note that for the cases such as zero-copy read, caller must release the disk ranges - * produced after being done with them. Call isTrackingDiskRanges to find out if this is needed. - * @param range List if disk ranges to read. Ranges with data will be ignored. - * @param baseOffset Base offset from the start of the file of the ranges in disk range list. - * @param doForceDirect Whether the data should be read into direct buffers. - * @return New or modified list of DiskRange-s, where all the ranges are filled with data. - */ - DiskRangeList readFileData( - DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException; - - - /** - * Whether the user should release buffers created by readFileData. See readFileData javadoc. - */ - boolean isTrackingDiskRanges(); - - /** - * Releases buffers created by readFileData. See readFileData javadoc. - * @param toRelease The buffer to release. - */ - void releaseBuffer(ByteBuffer toRelease); -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java deleted file mode 100644 index ae3fe31..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.util.Date; - -/** - * Statistics for DATE columns. - */ -public interface DateColumnStatistics extends ColumnStatistics { - /** - * Get the minimum value for the column. - * @return minimum value - */ - Date getMinimum(); - - /** - * Get the maximum value for the column. - * @return maximum value - */ - Date getMaximum(); -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java deleted file mode 100644 index ec6aa43..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DecimalColumnStatistics.java +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import org.apache.hadoop.hive.common.type.HiveDecimal; - -/** - * Statistics for decimal columns. - */ -public interface DecimalColumnStatistics extends ColumnStatistics { - - /** - * Get the minimum value for the column. - * @return the minimum value - */ - HiveDecimal getMinimum(); - - /** - * Get the maximum value for the column. - * @return the maximum value - */ - HiveDecimal getMaximum(); - - /** - * Get the sum of the values of the column. - * @return the sum - */ - HiveDecimal getSum(); - -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java deleted file mode 100644 index 41a77b0..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DirectDecompressionCodec.java +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; -import java.nio.ByteBuffer; - -public interface DirectDecompressionCodec extends CompressionCodec { - public boolean isAvailable(); - public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException; -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java deleted file mode 100644 index 6af7535..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -/** - * Statistics for float and double columns. - */ -public interface DoubleColumnStatistics extends ColumnStatistics { - - /** - * Get the smallest value in the column. Only defined if getNumberOfValues - * is non-zero. - * @return the minimum - */ - double getMinimum(); - - /** - * Get the largest value in the column. Only defined if getNumberOfValues - * is non-zero. - * @return the maximum - */ - double getMaximum(); - - /** - * Get the sum of the values in the column. - * @return the sum - */ - double getSum(); -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java deleted file mode 100644 index 063c53c..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java +++ /dev/null @@ -1,303 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.ByteBuffer; - -import org.apache.hadoop.io.Text; - -/** - * A class that is a growable array of bytes. Growth is managed in terms of - * chunks that are allocated when needed. - */ -public final class DynamicByteArray { - static final int DEFAULT_CHUNKSIZE = 32 * 1024; - static final int DEFAULT_NUM_CHUNKS = 128; - - private final int chunkSize; // our allocation sizes - private byte[][] data; // the real data - private int length; // max set element index +1 - private int initializedChunks = 0; // the number of chunks created - - public DynamicByteArray() { - this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE); - } - - public DynamicByteArray(int numChunks, int chunkSize) { - if (chunkSize == 0) { - throw new IllegalArgumentException("bad chunksize"); - } - this.chunkSize = chunkSize; - data = new byte[numChunks][]; - } - - /** - * Ensure that the given index is valid. - */ - private void grow(int chunkIndex) { - if (chunkIndex >= initializedChunks) { - if (chunkIndex >= data.length) { - int newSize = Math.max(chunkIndex + 1, 2 * data.length); - byte[][] newChunk = new byte[newSize][]; - System.arraycopy(data, 0, newChunk, 0, data.length); - data = newChunk; - } - for(int i=initializedChunks; i <= chunkIndex; ++i) { - data[i] = new byte[chunkSize]; - } - initializedChunks = chunkIndex + 1; - } - } - - public byte get(int index) { - if (index >= length) { - throw new IndexOutOfBoundsException("Index " + index + - " is outside of 0.." + - (length - 1)); - } - int i = index / chunkSize; - int j = index % chunkSize; - return data[i][j]; - } - - public void set(int index, byte value) { - int i = index / chunkSize; - int j = index % chunkSize; - grow(i); - if (index >= length) { - length = index + 1; - } - data[i][j] = value; - } - - public int add(byte value) { - int i = length / chunkSize; - int j = length % chunkSize; - grow(i); - data[i][j] = value; - int result = length; - length += 1; - return result; - } - - /** - * Copy a slice of a byte array into our buffer. - * @param value the array to copy from - * @param valueOffset the first location to copy from value - * @param valueLength the number of bytes to copy from value - * @return the offset of the start of the value - */ - public int add(byte[] value, int valueOffset, int valueLength) { - int i = length / chunkSize; - int j = length % chunkSize; - grow((length + valueLength) / chunkSize); - int remaining = valueLength; - while (remaining > 0) { - int size = Math.min(remaining, chunkSize - j); - System.arraycopy(value, valueOffset, data[i], j, size); - remaining -= size; - valueOffset += size; - i += 1; - j = 0; - } - int result = length; - length += valueLength; - return result; - } - - /** - * Read the entire stream into this array. - * @param in the stream to read from - * @throws IOException - */ - public void readAll(InputStream in) throws IOException { - int currentChunk = length / chunkSize; - int currentOffset = length % chunkSize; - grow(currentChunk); - int currentLength = in.read(data[currentChunk], currentOffset, - chunkSize - currentOffset); - while (currentLength > 0) { - length += currentLength; - currentOffset = length % chunkSize; - if (currentOffset == 0) { - currentChunk = length / chunkSize; - grow(currentChunk); - } - currentLength = in.read(data[currentChunk], currentOffset, - chunkSize - currentOffset); - } - } - - /** - * Byte compare a set of bytes against the bytes in this dynamic array. - * @param other source of the other bytes - * @param otherOffset start offset in the other array - * @param otherLength number of bytes in the other array - * @param ourOffset the offset in our array - * @param ourLength the number of bytes in our array - * @return negative for less, 0 for equal, positive for greater - */ - public int compare(byte[] other, int otherOffset, int otherLength, - int ourOffset, int ourLength) { - int currentChunk = ourOffset / chunkSize; - int currentOffset = ourOffset % chunkSize; - int maxLength = Math.min(otherLength, ourLength); - while (maxLength > 0 && - other[otherOffset] == data[currentChunk][currentOffset]) { - otherOffset += 1; - currentOffset += 1; - if (currentOffset == chunkSize) { - currentChunk += 1; - currentOffset = 0; - } - maxLength -= 1; - } - if (maxLength == 0) { - return otherLength - ourLength; - } - int otherByte = 0xff & other[otherOffset]; - int ourByte = 0xff & data[currentChunk][currentOffset]; - return otherByte > ourByte ? 1 : -1; - } - - /** - * Get the size of the array. - * @return the number of bytes in the array - */ - public int size() { - return length; - } - - /** - * Clear the array to its original pristine state. - */ - public void clear() { - length = 0; - for(int i=0; i < data.length; ++i) { - data[i] = null; - } - initializedChunks = 0; - } - - /** - * Set a text value from the bytes in this dynamic array. - * @param result the value to set - * @param offset the start of the bytes to copy - * @param length the number of bytes to copy - */ - public void setText(Text result, int offset, int length) { - result.clear(); - int currentChunk = offset / chunkSize; - int currentOffset = offset % chunkSize; - int currentLength = Math.min(length, chunkSize - currentOffset); - while (length > 0) { - result.append(data[currentChunk], currentOffset, currentLength); - length -= currentLength; - currentChunk += 1; - currentOffset = 0; - currentLength = Math.min(length, chunkSize - currentOffset); - } - } - - /** - * Write out a range of this dynamic array to an output stream. - * @param out the stream to write to - * @param offset the first offset to write - * @param length the number of bytes to write - * @throws IOException - */ - public void write(OutputStream out, int offset, - int length) throws IOException { - int currentChunk = offset / chunkSize; - int currentOffset = offset % chunkSize; - while (length > 0) { - int currentLength = Math.min(length, chunkSize - currentOffset); - out.write(data[currentChunk], currentOffset, currentLength); - length -= currentLength; - currentChunk += 1; - currentOffset = 0; - } - } - - @Override - public String toString() { - int i; - StringBuilder sb = new StringBuilder(length * 3); - - sb.append('{'); - int l = length - 1; - for (i=0; i 0) { - result.put(data[currentChunk], currentOffset, currentLength); - length -= currentLength; - currentChunk += 1; - currentOffset = 0; - currentLength = Math.min(length, chunkSize - currentOffset); - } - } - - /** - * Gets all the bytes of the array. - * - * @return Bytes of the array - */ - public byte[] get() { - byte[] result = null; - if (length > 0) { - int currentChunk = 0; - int currentOffset = 0; - int currentLength = Math.min(length, chunkSize); - int destOffset = 0; - result = new byte[length]; - int totalLength = length; - while (totalLength > 0) { - System.arraycopy(data[currentChunk], currentOffset, result, destOffset, currentLength); - destOffset += currentLength; - totalLength -= currentLength; - currentChunk += 1; - currentOffset = 0; - currentLength = Math.min(totalLength, chunkSize - currentOffset); - } - } - return result; - } - - /** - * Get the size of the buffers. - */ - public long getSizeInBytes() { - return initializedChunks * chunkSize; - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java deleted file mode 100644 index 2e884c0..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java +++ /dev/null @@ -1,142 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -/** - * Dynamic int array that uses primitive types and chunks to avoid copying - * large number of integers when it resizes. - * - * The motivation for this class is memory optimization, i.e. space efficient - * storage of potentially huge arrays without good a-priori size guesses. - * - * The API of this class is between a primitive array and a AbstractList. It's - * not a Collection implementation because it handles primitive types, but the - * API could be extended to support iterators and the like. - * - * NOTE: Like standard Collection implementations/arrays, this class is not - * synchronized. - */ -final class DynamicIntArray { - static final int DEFAULT_CHUNKSIZE = 8 * 1024; - static final int INIT_CHUNKS = 128; - - private final int chunkSize; // our allocation size - private int[][] data; // the real data - private int length; // max set element index +1 - private int initializedChunks = 0; // the number of created chunks - - public DynamicIntArray() { - this(DEFAULT_CHUNKSIZE); - } - - public DynamicIntArray(int chunkSize) { - this.chunkSize = chunkSize; - - data = new int[INIT_CHUNKS][]; - } - - /** - * Ensure that the given index is valid. - */ - private void grow(int chunkIndex) { - if (chunkIndex >= initializedChunks) { - if (chunkIndex >= data.length) { - int newSize = Math.max(chunkIndex + 1, 2 * data.length); - int[][] newChunk = new int[newSize][]; - System.arraycopy(data, 0, newChunk, 0, data.length); - data = newChunk; - } - for (int i=initializedChunks; i <= chunkIndex; ++i) { - data[i] = new int[chunkSize]; - } - initializedChunks = chunkIndex + 1; - } - } - - public int get(int index) { - if (index >= length) { - throw new IndexOutOfBoundsException("Index " + index + - " is outside of 0.." + - (length - 1)); - } - int i = index / chunkSize; - int j = index % chunkSize; - return data[i][j]; - } - - public void set(int index, int value) { - int i = index / chunkSize; - int j = index % chunkSize; - grow(i); - if (index >= length) { - length = index + 1; - } - data[i][j] = value; - } - - public void increment(int index, int value) { - int i = index / chunkSize; - int j = index % chunkSize; - grow(i); - if (index >= length) { - length = index + 1; - } - data[i][j] += value; - } - - public void add(int value) { - int i = length / chunkSize; - int j = length % chunkSize; - grow(i); - data[i][j] = value; - length += 1; - } - - public int size() { - return length; - } - - public void clear() { - length = 0; - for(int i=0; i < data.length; ++i) { - data[i] = null; - } - initializedChunks = 0; - } - - public String toString() { - int i; - StringBuilder sb = new StringBuilder(length * 4); - - sb.append('{'); - int l = length - 1; - for (i=0; i= rowGroupIndex.length) || ((index = rowGroupIndex[col]) == null)) { @@ -661,7 +667,7 @@ public final class FileDump { for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) { buf.append("\n Entry ").append(entryIx).append(": "); - RowIndexEntry entry = index.getEntry(entryIx); + OrcProto.RowIndexEntry entry = index.getEntry(entryIx); if (entry == null) { buf.append("unknown\n"); continue; @@ -686,7 +692,7 @@ public final class FileDump { public static long getTotalPaddingSize(Reader reader) throws IOException { long paddedBytes = 0; - List stripes = reader.getStripes(); + List stripes = reader.getStripes(); for (int i = 1; i < stripes.size(); i++) { long prevStripeOffset = stripes.get(i - 1).getOffset(); long prevStripeLen = stripes.get(i - 1).getLength(); http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java deleted file mode 100644 index 95c674e..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetaInfo.java +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.orc; - -import java.nio.ByteBuffer; -import java.util.List; - -import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion; - -/** - * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file - * that is useful for Reader implementation - * - */ -public class FileMetaInfo { - ByteBuffer footerMetaAndPsBuffer; - final String compressionType; - final int bufferSize; - final int metadataSize; - final ByteBuffer footerBuffer; - final List versionList; - final OrcFile.WriterVersion writerVersion; - - - /** Ctor used when reading splits - no version list or full footer buffer. */ - FileMetaInfo(String compressionType, int bufferSize, int metadataSize, - ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) { - this(compressionType, bufferSize, metadataSize, footerBuffer, null, - writerVersion, null); - } - - /** Ctor used when creating file info during init and when getting a new one. */ - public FileMetaInfo(String compressionType, int bufferSize, int metadataSize, - ByteBuffer footerBuffer, List versionList, WriterVersion writerVersion, - ByteBuffer fullFooterBuffer) { - this.compressionType = compressionType; - this.bufferSize = bufferSize; - this.metadataSize = metadataSize; - this.footerBuffer = footerBuffer; - this.versionList = versionList; - this.writerVersion = writerVersion; - this.footerMetaAndPsBuffer = fullFooterBuffer; - } - - public OrcFile.WriterVersion getWriterVersion() { - return writerVersion; - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6c759045/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java deleted file mode 100644 index 26b27a3..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileMetadata.java +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.orc; - -import java.util.List; - -import org.apache.hadoop.hive.ql.io.orc.OrcProto; -import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; - -/** - * Cached file metadata. Right now, it caches everything; we don't have to store all the - * protobuf structs actually, we could just store what we need, but that would require that - * ORC stop depending on them too. Luckily, they shouldn't be very big. - */ -public interface FileMetadata { - boolean isOriginalFormat(); - - List getStripes(); - - CompressionKind getCompressionKind(); - - int getCompressionBufferSize(); - - int getRowIndexStride(); - - int getColumnCount(); - - int getFlattenedColumnCount(); - - long getFileId(); - - List getVersionList(); - - int getMetadataSize(); - - int getWriterVersionNum(); - - List getTypes(); - - List getStripeStats(); - - long getContentLength(); - - long getNumberOfRows(); - - List getFileStats(); -} \ No newline at end of file