Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id E640718D43 for ; Sun, 21 Jun 2015 05:25:38 +0000 (UTC) Received: (qmail 82245 invoked by uid 500); 21 Jun 2015 05:25:28 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 82085 invoked by uid 500); 21 Jun 2015 05:25:28 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 79128 invoked by uid 99); 21 Jun 2015 05:25:26 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 21 Jun 2015 05:25:26 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id D6D97E3619; Sun, 21 Jun 2015 05:25:26 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: xuefu@apache.org To: commits@hive.apache.org Date: Sun, 21 Jun 2015 05:26:12 -0000 Message-Id: In-Reply-To: <8b5ddf5e238a45498fc293d82199240e@git.apache.org> References: <8b5ddf5e238a45498fc293d82199240e@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [47/61] [abbrv] hive git commit: HIVE-11031: ORC concatenation of old files can fail while merging column statistics (Prasanth Jayachandran reviewed by Gopal V) HIVE-11031: ORC concatenation of old files can fail while merging column statistics (Prasanth Jayachandran reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3f8b0ef8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3f8b0ef8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3f8b0ef8 Branch: refs/heads/spark Commit: 3f8b0ef87dfb374038c7170dc8f94c52974872ca Parents: 8ef6e68 Author: Prasanth Jayachandran Authored: Thu Jun 18 07:35:54 2015 -0700 Committer: Prasanth Jayachandran Committed: Thu Jun 18 07:35:54 2015 -0700 ---------------------------------------------------------------------- .../hive/ql/exec/OrcFileMergeOperator.java | 70 +++--- .../hive/ql/io/orc/ColumnStatisticsImpl.java | 217 ++++++++++++------- .../hive/ql/io/orc/OrcFileKeyWrapper.java | 27 ++- .../io/orc/OrcFileStripeMergeRecordReader.java | 22 +- ql/src/test/queries/clientpositive/orc_merge9.q | 44 ++++ .../results/clientpositive/orc_merge9.q.out | 186 ++++++++++++++++ 6 files changed, 434 insertions(+), 132 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java index 866f7c0..470c4e5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hive.ql.exec; +import java.io.IOException; + +import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; @@ -32,8 +35,6 @@ import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.shims.CombineHiveKey; -import java.io.IOException; - /** * Fast file merge operator for ORC files. */ @@ -72,6 +73,14 @@ public class OrcFileMergeOperator extends } else { k = (OrcFileKeyWrapper) key; } + + // skip incompatible file, files that are missing stripe statistics are set to incompatible + if (k.isIncompatFile()) { + LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath()); + incompatFileSet.add(k.getInputPath()); + return; + } + filePath = k.getInputPath().toUri().getPath(); fixTmpPath(k.getInputPath().getParent()); @@ -81,9 +90,9 @@ public class OrcFileMergeOperator extends if (prevPath == null) { prevPath = k.getInputPath(); reader = OrcFile.createReader(fs, k.getInputPath()); - if (isLogInfoEnabled) { - LOG.info("ORC merge file input path: " + k.getInputPath()); - } + if (isLogInfoEnabled) { + LOG.info("ORC merge file input path: " + k.getInputPath()); + } } // store the orc configuration from the first file. All other files should @@ -102,9 +111,9 @@ public class OrcFileMergeOperator extends .version(version) .rowIndexStride(rowIndexStride) .inspector(reader.getObjectInspector())); - if (isLogDebugEnabled) { - LOG.info("ORC merge file output path: " + outPath); - } + if (isLogDebugEnabled) { + LOG.info("ORC merge file output path: " + outPath); + } } if (!checkCompatibility(k)) { @@ -128,9 +137,10 @@ public class OrcFileMergeOperator extends v.getStripeStatistics()); if (isLogInfoEnabled) { - LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : " - + v.getStripeInformation().getOffset() + " length: " - + v.getStripeInformation().getLength() + " ]"); + LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : " + + v.getStripeInformation().getOffset() + " length: " + + v.getStripeInformation().getLength() + " row: " + + v.getStripeStatistics().getColStats(0).getNumberOfValues() + " ]"); } // add user metadata to footer in case of any @@ -139,9 +149,12 @@ public class OrcFileMergeOperator extends } } catch (Throwable e) { this.exception = true; - closeOp(true); + LOG.error("Closing operator..Exception: " + ExceptionUtils.getStackTrace(e)); throw new HiveException(e); } finally { + if (exception) { + closeOp(true); + } if (fdis != null) { try { fdis.close(); @@ -157,43 +170,28 @@ public class OrcFileMergeOperator extends private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { - if (isLogInfoEnabled) { - LOG.info("Incompatible ORC file merge! Column counts does not match for " - + k.getInputPath()); - } + LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath()); return false; } if (!k.getCompression().equals(compression)) { - if (isLogInfoEnabled) { - LOG.info("Incompatible ORC file merge! Compression codec does not match" + - " for " + k.getInputPath()); - } + LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath()); return false; } if (k.getCompressBufferSize() != compressBuffSize) { - if (isLogInfoEnabled) { - LOG.info("Incompatible ORC file merge! Compression buffer size does not" + - " match for " + k.getInputPath()); - } + LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath()); return false; } if (!k.getVersion().equals(version)) { - if (isLogInfoEnabled) { - LOG.info("Incompatible ORC file merge! Version does not match for " - + k.getInputPath()); - } + LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath()); return false; } if (k.getRowIndexStride() != rowIndexStride) { - if (isLogInfoEnabled) { - LOG.info("Incompatible ORC file merge! Row index stride does not match" + - " for " + k.getInputPath()); - } + LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath()); return false; } @@ -232,7 +230,7 @@ public class OrcFileMergeOperator extends outWriter.close(); outWriter = null; - } catch (IOException e) { + } catch (Exception e) { throw new HiveException("Unable to close OrcFileMergeOperator", e); } super.closeOp(abort); http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java index ffba3c6..15a3e2c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java @@ -57,9 +57,15 @@ class ColumnStatisticsImpl implements ColumnStatistics { @Override void merge(ColumnStatisticsImpl other) { + if (other instanceof BooleanStatisticsImpl) { + BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other; + trueCount += bkt.trueCount; + } else { + if (isStatsExists() && trueCount != 0) { + throw new IllegalArgumentException("Incompatible merging of boolean column statistics"); + } + } super.merge(other); - BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other; - trueCount += bkt.trueCount; } @Override @@ -149,28 +155,35 @@ class ColumnStatisticsImpl implements ColumnStatistics { @Override void merge(ColumnStatisticsImpl other) { - IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other; - if (!hasMinimum) { - hasMinimum = otherInt.hasMinimum; - minimum = otherInt.minimum; - maximum = otherInt.maximum; - } else if (otherInt.hasMinimum) { - if (otherInt.minimum < minimum) { + if (other instanceof IntegerStatisticsImpl) { + IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other; + if (!hasMinimum) { + hasMinimum = otherInt.hasMinimum; minimum = otherInt.minimum; - } - if (otherInt.maximum > maximum) { maximum = otherInt.maximum; + } else if (otherInt.hasMinimum) { + if (otherInt.minimum < minimum) { + minimum = otherInt.minimum; + } + if (otherInt.maximum > maximum) { + maximum = otherInt.maximum; + } } - } - super.merge(other); - overflow |= otherInt.overflow; - if (!overflow) { - boolean wasPositive = sum >= 0; - sum += otherInt.sum; - if ((otherInt.sum >= 0) == wasPositive) { - overflow = (sum >= 0) != wasPositive; + + overflow |= otherInt.overflow; + if (!overflow) { + boolean wasPositive = sum >= 0; + sum += otherInt.sum; + if ((otherInt.sum >= 0) == wasPositive) { + overflow = (sum >= 0) != wasPositive; + } + } + } else { + if (isStatsExists() && hasMinimum) { + throw new IllegalArgumentException("Incompatible merging of integer column statistics"); } } + super.merge(other); } @Override @@ -276,21 +289,27 @@ class ColumnStatisticsImpl implements ColumnStatistics { @Override void merge(ColumnStatisticsImpl other) { - super.merge(other); - DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other; - if (!hasMinimum) { - hasMinimum = dbl.hasMinimum; - minimum = dbl.minimum; - maximum = dbl.maximum; - } else if (dbl.hasMinimum) { - if (dbl.minimum < minimum) { + if (other instanceof DoubleStatisticsImpl) { + DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other; + if (!hasMinimum) { + hasMinimum = dbl.hasMinimum; minimum = dbl.minimum; - } - if (dbl.maximum > maximum) { maximum = dbl.maximum; + } else if (dbl.hasMinimum) { + if (dbl.minimum < minimum) { + minimum = dbl.minimum; + } + if (dbl.maximum > maximum) { + maximum = dbl.maximum; + } + } + sum += dbl.sum; + } else { + if (isStatsExists() && hasMinimum) { + throw new IllegalArgumentException("Incompatible merging of double column statistics"); } } - sum += dbl.sum; + super.merge(other); } @Override @@ -382,25 +401,31 @@ class ColumnStatisticsImpl implements ColumnStatistics { @Override void merge(ColumnStatisticsImpl other) { - super.merge(other); - StringStatisticsImpl str = (StringStatisticsImpl) other; - if (minimum == null) { - if(str.minimum != null) { - maximum = new Text(str.getMaximum()); - minimum = new Text(str.getMinimum()); - } else { + if (other instanceof StringStatisticsImpl) { + StringStatisticsImpl str = (StringStatisticsImpl) other; + if (minimum == null) { + if (str.minimum != null) { + maximum = new Text(str.getMaximum()); + minimum = new Text(str.getMinimum()); + } else { /* both are empty */ - maximum = minimum = null; + maximum = minimum = null; + } + } else if (str.minimum != null) { + if (minimum.compareTo(str.minimum) > 0) { + minimum = new Text(str.getMinimum()); + } + if (maximum.compareTo(str.maximum) < 0) { + maximum = new Text(str.getMaximum()); + } } - } else if (str.minimum != null) { - if (minimum.compareTo(str.minimum) > 0) { - minimum = new Text(str.getMinimum()); - } - if (maximum.compareTo(str.maximum) < 0) { - maximum = new Text(str.getMaximum()); + sum += str.sum; + } else { + if (isStatsExists() && minimum != null) { + throw new IllegalArgumentException("Incompatible merging of string column statistics"); } } - sum += str.sum; + super.merge(other); } @Override @@ -476,9 +501,15 @@ class ColumnStatisticsImpl implements ColumnStatistics { @Override void merge(ColumnStatisticsImpl other) { + if (other instanceof BinaryColumnStatistics) { + BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other; + sum += bin.sum; + } else { + if (isStatsExists() && sum != 0) { + throw new IllegalArgumentException("Incompatible merging of binary column statistics"); + } + } super.merge(other); - BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other; - sum += bin.sum; } @Override @@ -556,25 +587,31 @@ class ColumnStatisticsImpl implements ColumnStatistics { @Override void merge(ColumnStatisticsImpl other) { - super.merge(other); - DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other; - if (minimum == null) { - minimum = dec.minimum; - maximum = dec.maximum; - sum = dec.sum; - } else if (dec.minimum != null) { - if (minimum.compareTo(dec.minimum) > 0) { + if (other instanceof DecimalStatisticsImpl) { + DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other; + if (minimum == null) { minimum = dec.minimum; - } - if (maximum.compareTo(dec.maximum) < 0) { maximum = dec.maximum; + sum = dec.sum; + } else if (dec.minimum != null) { + if (minimum.compareTo(dec.minimum) > 0) { + minimum = dec.minimum; + } + if (maximum.compareTo(dec.maximum) < 0) { + maximum = dec.maximum; + } + if (sum == null || dec.sum == null) { + sum = null; + } else { + sum = sum.add(dec.sum); + } } - if (sum == null || dec.sum == null) { - sum = null; - } else { - sum = sum.add(dec.sum); + } else { + if (isStatsExists() && minimum != null) { + throw new IllegalArgumentException("Incompatible merging of decimal column statistics"); } } + super.merge(other); } @Override @@ -582,7 +619,7 @@ class ColumnStatisticsImpl implements ColumnStatistics { OrcProto.ColumnStatistics.Builder result = super.serialize(); OrcProto.DecimalStatistics.Builder dec = OrcProto.DecimalStatistics.newBuilder(); - if (getNumberOfValues() != 0) { + if (getNumberOfValues() != 0 && minimum != null) { dec.setMinimum(minimum.toString()); dec.setMaximum(maximum.toString()); } @@ -666,19 +703,25 @@ class ColumnStatisticsImpl implements ColumnStatistics { @Override void merge(ColumnStatisticsImpl other) { - super.merge(other); - DateStatisticsImpl dateStats = (DateStatisticsImpl) other; - if (minimum == null) { - minimum = dateStats.minimum; - maximum = dateStats.maximum; - } else if (dateStats.minimum != null) { - if (minimum > dateStats.minimum) { + if (other instanceof DateStatisticsImpl) { + DateStatisticsImpl dateStats = (DateStatisticsImpl) other; + if (minimum == null) { minimum = dateStats.minimum; - } - if (maximum < dateStats.maximum) { maximum = dateStats.maximum; + } else if (dateStats.minimum != null) { + if (minimum > dateStats.minimum) { + minimum = dateStats.minimum; + } + if (maximum < dateStats.maximum) { + maximum = dateStats.maximum; + } + } + } else { + if (isStatsExists() && minimum != null) { + throw new IllegalArgumentException("Incompatible merging of date column statistics"); } } + super.merge(other); } @Override @@ -686,7 +729,7 @@ class ColumnStatisticsImpl implements ColumnStatistics { OrcProto.ColumnStatistics.Builder result = super.serialize(); OrcProto.DateStatistics.Builder dateStats = OrcProto.DateStatistics.newBuilder(); - if (getNumberOfValues() != 0) { + if (getNumberOfValues() != 0 && minimum != null) { dateStats.setMinimum(minimum); dateStats.setMaximum(maximum); } @@ -769,19 +812,25 @@ class ColumnStatisticsImpl implements ColumnStatistics { @Override void merge(ColumnStatisticsImpl other) { - super.merge(other); - TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other; - if (minimum == null) { - minimum = timestampStats.minimum; - maximum = timestampStats.maximum; - } else if (timestampStats.minimum != null) { - if (minimum > timestampStats.minimum) { + if (other instanceof TimestampStatisticsImpl) { + TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other; + if (minimum == null) { minimum = timestampStats.minimum; - } - if (maximum < timestampStats.maximum) { maximum = timestampStats.maximum; + } else if (timestampStats.minimum != null) { + if (minimum > timestampStats.minimum) { + minimum = timestampStats.minimum; + } + if (maximum < timestampStats.maximum) { + maximum = timestampStats.maximum; + } + } + } else { + if (isStatsExists() && minimum != null) { + throw new IllegalArgumentException("Incompatible merging of timestamp column statistics"); } } + super.merge(other); } @Override @@ -789,7 +838,7 @@ class ColumnStatisticsImpl implements ColumnStatistics { OrcProto.ColumnStatistics.Builder result = super.serialize(); OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics .newBuilder(); - if (getNumberOfValues() != 0) { + if (getNumberOfValues() != 0 && minimum != null) { timestampStats.setMinimum(minimum); timestampStats.setMaximum(maximum); } @@ -878,6 +927,10 @@ class ColumnStatisticsImpl implements ColumnStatistics { throw new UnsupportedOperationException("Can't update timestamp"); } + boolean isStatsExists() { + return (count > 0 || hasNull == true); + } + void merge(ColumnStatisticsImpl stats) { count += stats.count; hasNull |= stats.hasNull; http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java index 11f05c6..a62fc1e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java @@ -18,26 +18,35 @@ package org.apache.hadoop.hive.ql.io.orc; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.WritableComparable; - import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.List; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.WritableComparable; + /** * Key for OrcFileMergeMapper task. Contains orc file related information that * should match before merging two orc files. */ public class OrcFileKeyWrapper implements WritableComparable { - protected Path inputPath; - protected CompressionKind compression; - protected long compressBufferSize; - protected List types; - protected int rowIndexStride; - protected OrcFile.Version version; + private Path inputPath; + private CompressionKind compression; + private long compressBufferSize; + private List types; + private int rowIndexStride; + private OrcFile.Version version; + private boolean isIncompatFile; + + public boolean isIncompatFile() { + return isIncompatFile; + } + + public void setIsIncompatFile(boolean isIncompatFile) { + this.isIncompatFile = isIncompatFile; + } public OrcFile.Version getVersion() { return version; http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java index cf6fa2a..41a97a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java @@ -18,19 +18,18 @@ package org.apache.hadoop.hive.ql.io.orc; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.RecordReader; -import java.io.IOException; -import java.util.Iterator; -import java.util.List; - public class OrcFileStripeMergeRecordReader implements RecordReader { - private final Reader reader; private final Path path; protected Iterator iter; @@ -38,6 +37,7 @@ public class OrcFileStripeMergeRecordReader implements private int stripeIdx; private long start; private long end; + private boolean skipFile; public OrcFileStripeMergeRecordReader(Configuration conf, FileSplit split) throws IOException { path = split.getPath(); @@ -68,11 +68,23 @@ public class OrcFileStripeMergeRecordReader implements @Override public boolean next(OrcFileKeyWrapper key, OrcFileValueWrapper value) throws IOException { + if (skipFile) { + return false; + } return nextStripe(key, value); } protected boolean nextStripe(OrcFileKeyWrapper keyWrapper, OrcFileValueWrapper valueWrapper) throws IOException { + // missing stripe stats (old format). If numRows is 0 then its an empty file and no statistics + // is present. We have to differentiate no stats (empty file) vs missing stats (old format). + if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows() > 0) { + keyWrapper.setInputPath(path); + keyWrapper.setIsIncompatFile(true); + skipFile = true; + return true; + } + while (iter.hasNext()) { StripeInformation si = iter.next(); http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/test/queries/clientpositive/orc_merge9.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_merge9.q b/ql/src/test/queries/clientpositive/orc_merge9.q new file mode 100644 index 0000000..010b5a1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_merge9.q @@ -0,0 +1,44 @@ +create table ts_merge ( +userid bigint, +string1 string, +subtype double, +decimal1 decimal(38,18), +ts timestamp +) stored as orc; + +load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table ts_merge; +load data local inpath '../../data/files/orc_split_elim.orc' into table ts_merge; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/ts_merge/; + +set hive.merge.orcfile.stripe.level=true; +set hive.merge.tezfiles=true; +set hive.merge.mapfiles=true; +set hive.merge.mapredfiles=true; + +select count(*) from ts_merge; +alter table ts_merge concatenate; +select count(*) from ts_merge; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/ts_merge/; + +-- incompatible merge test (stripe statistics missing) + +create table a_merge like alltypesorc; + +insert overwrite table a_merge select * from alltypesorc; +load data local inpath '../../data/files/alltypesorc' into table a_merge; +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/a_merge/; + +select count(*) from a_merge; +alter table a_merge concatenate; +select count(*) from a_merge; +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/a_merge/; + +insert into table a_merge select * from alltypesorc; +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/a_merge/; + +select count(*) from a_merge; +alter table a_merge concatenate; +select count(*) from a_merge; +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/a_merge/; http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/test/results/clientpositive/orc_merge9.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/orc_merge9.q.out b/ql/src/test/results/clientpositive/orc_merge9.q.out new file mode 100644 index 0000000..bdf0fd3 --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_merge9.q.out @@ -0,0 +1,186 @@ +PREHOOK: query: create table ts_merge ( +userid bigint, +string1 string, +subtype double, +decimal1 decimal(38,18), +ts timestamp +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ts_merge +POSTHOOK: query: create table ts_merge ( +userid bigint, +string1 string, +subtype double, +decimal1 decimal(38,18), +ts timestamp +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ts_merge +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table ts_merge +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@ts_merge +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table ts_merge +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@ts_merge +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table ts_merge +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@ts_merge +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table ts_merge +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@ts_merge +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select count(*) from ts_merge +PREHOOK: type: QUERY +PREHOOK: Input: default@ts_merge +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ts_merge +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ts_merge +#### A masked pattern was here #### +50000 +PREHOOK: query: alter table ts_merge concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@ts_merge +PREHOOK: Output: default@ts_merge +POSTHOOK: query: alter table ts_merge concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@ts_merge +POSTHOOK: Output: default@ts_merge +PREHOOK: query: select count(*) from ts_merge +PREHOOK: type: QUERY +PREHOOK: Input: default@ts_merge +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ts_merge +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ts_merge +#### A masked pattern was here #### +50000 +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: -- incompatible merge test (stripe statistics missing) + +create table a_merge like alltypesorc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a_merge +POSTHOOK: query: -- incompatible merge test (stripe statistics missing) + +create table a_merge like alltypesorc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a_merge +PREHOOK: query: insert overwrite table a_merge select * from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@a_merge +POSTHOOK: query: insert overwrite table a_merge select * from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@a_merge +POSTHOOK: Lineage: a_merge.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: a_merge.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: a_merge.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +POSTHOOK: Lineage: a_merge.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: a_merge.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: a_merge.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: a_merge.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: a_merge.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: a_merge.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] +POSTHOOK: Lineage: a_merge.ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: a_merge.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] +POSTHOOK: Lineage: a_merge.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table a_merge +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@a_merge +POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table a_merge +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@a_merge +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select count(*) from a_merge +PREHOOK: type: QUERY +PREHOOK: Input: default@a_merge +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from a_merge +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_merge +#### A masked pattern was here #### +24576 +PREHOOK: query: alter table a_merge concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@a_merge +PREHOOK: Output: default@a_merge +POSTHOOK: query: alter table a_merge concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@a_merge +POSTHOOK: Output: default@a_merge +PREHOOK: query: select count(*) from a_merge +PREHOOK: type: QUERY +PREHOOK: Input: default@a_merge +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from a_merge +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_merge +#### A masked pattern was here #### +24576 +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: insert into table a_merge select * from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@a_merge +POSTHOOK: query: insert into table a_merge select * from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@a_merge +POSTHOOK: Lineage: a_merge.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: a_merge.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: a_merge.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +POSTHOOK: Lineage: a_merge.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: a_merge.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: a_merge.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: a_merge.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: a_merge.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: a_merge.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] +POSTHOOK: Lineage: a_merge.ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: a_merge.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] +POSTHOOK: Lineage: a_merge.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +Found 3 items +#### A masked pattern was here #### +PREHOOK: query: select count(*) from a_merge +PREHOOK: type: QUERY +PREHOOK: Input: default@a_merge +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from a_merge +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_merge +#### A masked pattern was here #### +36864 +PREHOOK: query: alter table a_merge concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@a_merge +PREHOOK: Output: default@a_merge +POSTHOOK: query: alter table a_merge concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@a_merge +POSTHOOK: Output: default@a_merge +PREHOOK: query: select count(*) from a_merge +PREHOOK: type: QUERY +PREHOOK: Input: default@a_merge +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from a_merge +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a_merge +#### A masked pattern was here #### +36864 +Found 2 items +#### A masked pattern was here ####