hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject [47/61] [abbrv] hive git commit: HIVE-11031: ORC concatenation of old files can fail while merging column statistics (Prasanth Jayachandran reviewed by Gopal V)
Date Sun, 21 Jun 2015 05:26:12 GMT
HIVE-11031: ORC concatenation of old files can fail while merging column statistics (Prasanth
Jayachandran reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3f8b0ef8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3f8b0ef8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3f8b0ef8

Branch: refs/heads/spark
Commit: 3f8b0ef87dfb374038c7170dc8f94c52974872ca
Parents: 8ef6e68
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Thu Jun 18 07:35:54 2015 -0700
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Thu Jun 18 07:35:54 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/exec/OrcFileMergeOperator.java      |  70 +++---
 .../hive/ql/io/orc/ColumnStatisticsImpl.java    | 217 ++++++++++++-------
 .../hive/ql/io/orc/OrcFileKeyWrapper.java       |  27 ++-
 .../io/orc/OrcFileStripeMergeRecordReader.java  |  22 +-
 ql/src/test/queries/clientpositive/orc_merge9.q |  44 ++++
 .../results/clientpositive/orc_merge9.q.out     | 186 ++++++++++++++++
 6 files changed, 434 insertions(+), 132 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
index 866f7c0..470c4e5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,6 +17,9 @@
  */
 package org.apache.hadoop.hive.ql.exec;
 
+import java.io.IOException;
+
+import org.apache.commons.lang.exception.ExceptionUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -32,8 +35,6 @@ import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.shims.CombineHiveKey;
 
-import java.io.IOException;
-
 /**
  * Fast file merge operator for ORC files.
  */
@@ -72,6 +73,14 @@ public class OrcFileMergeOperator extends
       } else {
         k = (OrcFileKeyWrapper) key;
       }
+
+      // skip incompatible file, files that are missing stripe statistics are set to incompatible
+      if (k.isIncompatFile()) {
+        LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath());
+        incompatFileSet.add(k.getInputPath());
+        return;
+      }
+
       filePath = k.getInputPath().toUri().getPath();
 
       fixTmpPath(k.getInputPath().getParent());
@@ -81,9 +90,9 @@ public class OrcFileMergeOperator extends
       if (prevPath == null) {
         prevPath = k.getInputPath();
         reader = OrcFile.createReader(fs, k.getInputPath());
-	if (isLogInfoEnabled) {
-	  LOG.info("ORC merge file input path: " + k.getInputPath());
-	}
+        if (isLogInfoEnabled) {
+          LOG.info("ORC merge file input path: " + k.getInputPath());
+        }
       }
 
       // store the orc configuration from the first file. All other files should
@@ -102,9 +111,9 @@ public class OrcFileMergeOperator extends
                 .version(version)
                 .rowIndexStride(rowIndexStride)
                 .inspector(reader.getObjectInspector()));
-	if (isLogDebugEnabled) {
-	  LOG.info("ORC merge file output path: " + outPath);
-	}
+        if (isLogDebugEnabled) {
+          LOG.info("ORC merge file output path: " + outPath);
+        }
       }
 
       if (!checkCompatibility(k)) {
@@ -128,9 +137,10 @@ public class OrcFileMergeOperator extends
           v.getStripeStatistics());
 
       if (isLogInfoEnabled) {
-	LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : "
-	    + v.getStripeInformation().getOffset() + " length: "
-	    + v.getStripeInformation().getLength() + " ]");
+        LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : "
+            + v.getStripeInformation().getOffset() + " length: "
+            + v.getStripeInformation().getLength() + " row: "
+            + v.getStripeStatistics().getColStats(0).getNumberOfValues() + " ]");
       }
 
       // add user metadata to footer in case of any
@@ -139,9 +149,12 @@ public class OrcFileMergeOperator extends
       }
     } catch (Throwable e) {
       this.exception = true;
-      closeOp(true);
+      LOG.error("Closing operator..Exception: " + ExceptionUtils.getStackTrace(e));
       throw new HiveException(e);
     } finally {
+      if (exception) {
+        closeOp(true);
+      }
       if (fdis != null) {
         try {
           fdis.close();
@@ -157,43 +170,28 @@ public class OrcFileMergeOperator extends
   private boolean checkCompatibility(OrcFileKeyWrapper k) {
     // check compatibility with subsequent files
     if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) {
-      if (isLogInfoEnabled) {
-	LOG.info("Incompatible ORC file merge! Column counts does not match for "
-	    + k.getInputPath());
-      }
+      LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath());
       return false;
     }
 
     if (!k.getCompression().equals(compression)) {
-      if (isLogInfoEnabled) {
-	LOG.info("Incompatible ORC file merge! Compression codec does not match" +
-	    " for " + k.getInputPath());
-      }
+      LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath());
       return false;
     }
 
     if (k.getCompressBufferSize() != compressBuffSize) {
-      if (isLogInfoEnabled) {
-	LOG.info("Incompatible ORC file merge! Compression buffer size does not" +
-	    " match for " + k.getInputPath());
-      }
+      LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath());
       return false;
 
     }
 
     if (!k.getVersion().equals(version)) {
-      if (isLogInfoEnabled) {
-	LOG.info("Incompatible ORC file merge! Version does not match for "
-	    + k.getInputPath());
-      }
+      LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath());
       return false;
     }
 
     if (k.getRowIndexStride() != rowIndexStride) {
-      if (isLogInfoEnabled) {
-	LOG.info("Incompatible ORC file merge! Row index stride does not match" +
-	    " for " + k.getInputPath());
-      }
+      LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath());
       return false;
     }
 
@@ -232,7 +230,7 @@ public class OrcFileMergeOperator extends
 
       outWriter.close();
       outWriter = null;
-    } catch (IOException e) {
+    } catch (Exception e) {
       throw new HiveException("Unable to close OrcFileMergeOperator", e);
     }
     super.closeOp(abort);

http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
index ffba3c6..15a3e2c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
@@ -57,9 +57,15 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     void merge(ColumnStatisticsImpl other) {
+      if (other instanceof BooleanStatisticsImpl) {
+        BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
+        trueCount += bkt.trueCount;
+      } else {
+        if (isStatsExists() && trueCount != 0) {
+          throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
+        }
+      }
       super.merge(other);
-      BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
-      trueCount += bkt.trueCount;
     }
 
     @Override
@@ -149,28 +155,35 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     void merge(ColumnStatisticsImpl other) {
-      IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
-      if (!hasMinimum) {
-        hasMinimum = otherInt.hasMinimum;
-        minimum = otherInt.minimum;
-        maximum = otherInt.maximum;
-      } else if (otherInt.hasMinimum) {
-        if (otherInt.minimum < minimum) {
+      if (other instanceof IntegerStatisticsImpl) {
+        IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
+        if (!hasMinimum) {
+          hasMinimum = otherInt.hasMinimum;
           minimum = otherInt.minimum;
-        }
-        if (otherInt.maximum > maximum) {
           maximum = otherInt.maximum;
+        } else if (otherInt.hasMinimum) {
+          if (otherInt.minimum < minimum) {
+            minimum = otherInt.minimum;
+          }
+          if (otherInt.maximum > maximum) {
+            maximum = otherInt.maximum;
+          }
         }
-      }
-      super.merge(other);
-      overflow |= otherInt.overflow;
-      if (!overflow) {
-        boolean wasPositive = sum >= 0;
-        sum += otherInt.sum;
-        if ((otherInt.sum >= 0) == wasPositive) {
-          overflow = (sum >= 0) != wasPositive;
+
+        overflow |= otherInt.overflow;
+        if (!overflow) {
+          boolean wasPositive = sum >= 0;
+          sum += otherInt.sum;
+          if ((otherInt.sum >= 0) == wasPositive) {
+            overflow = (sum >= 0) != wasPositive;
+          }
+        }
+      } else {
+        if (isStatsExists() && hasMinimum) {
+          throw new IllegalArgumentException("Incompatible merging of integer column statistics");
         }
       }
+      super.merge(other);
     }
 
     @Override
@@ -276,21 +289,27 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     void merge(ColumnStatisticsImpl other) {
-      super.merge(other);
-      DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
-      if (!hasMinimum) {
-        hasMinimum = dbl.hasMinimum;
-        minimum = dbl.minimum;
-        maximum = dbl.maximum;
-      } else if (dbl.hasMinimum) {
-        if (dbl.minimum < minimum) {
+      if (other instanceof DoubleStatisticsImpl) {
+        DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
+        if (!hasMinimum) {
+          hasMinimum = dbl.hasMinimum;
           minimum = dbl.minimum;
-        }
-        if (dbl.maximum > maximum) {
           maximum = dbl.maximum;
+        } else if (dbl.hasMinimum) {
+          if (dbl.minimum < minimum) {
+            minimum = dbl.minimum;
+          }
+          if (dbl.maximum > maximum) {
+            maximum = dbl.maximum;
+          }
+        }
+        sum += dbl.sum;
+      } else {
+        if (isStatsExists() && hasMinimum) {
+          throw new IllegalArgumentException("Incompatible merging of double column statistics");
         }
       }
-      sum += dbl.sum;
+      super.merge(other);
     }
 
     @Override
@@ -382,25 +401,31 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     void merge(ColumnStatisticsImpl other) {
-      super.merge(other);
-      StringStatisticsImpl str = (StringStatisticsImpl) other;
-      if (minimum == null) {
-        if(str.minimum != null) {
-          maximum = new Text(str.getMaximum());
-          minimum = new Text(str.getMinimum());
-        } else {
+      if (other instanceof StringStatisticsImpl) {
+        StringStatisticsImpl str = (StringStatisticsImpl) other;
+        if (minimum == null) {
+          if (str.minimum != null) {
+            maximum = new Text(str.getMaximum());
+            minimum = new Text(str.getMinimum());
+          } else {
           /* both are empty */
-          maximum = minimum = null;
+            maximum = minimum = null;
+          }
+        } else if (str.minimum != null) {
+          if (minimum.compareTo(str.minimum) > 0) {
+            minimum = new Text(str.getMinimum());
+          }
+          if (maximum.compareTo(str.maximum) < 0) {
+            maximum = new Text(str.getMaximum());
+          }
         }
-      } else if (str.minimum != null) {
-        if (minimum.compareTo(str.minimum) > 0) {
-          minimum = new Text(str.getMinimum());
-        }
-        if (maximum.compareTo(str.maximum) < 0) {
-          maximum = new Text(str.getMaximum());
+        sum += str.sum;
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of string column statistics");
         }
       }
-      sum += str.sum;
+      super.merge(other);
     }
 
     @Override
@@ -476,9 +501,15 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     void merge(ColumnStatisticsImpl other) {
+      if (other instanceof BinaryColumnStatistics) {
+        BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
+        sum += bin.sum;
+      } else {
+        if (isStatsExists() && sum != 0) {
+          throw new IllegalArgumentException("Incompatible merging of binary column statistics");
+        }
+      }
       super.merge(other);
-      BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
-      sum += bin.sum;
     }
 
     @Override
@@ -556,25 +587,31 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     void merge(ColumnStatisticsImpl other) {
-      super.merge(other);
-      DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
-      if (minimum == null) {
-        minimum = dec.minimum;
-        maximum = dec.maximum;
-        sum = dec.sum;
-      } else if (dec.minimum != null) {
-        if (minimum.compareTo(dec.minimum) > 0) {
+      if (other instanceof DecimalStatisticsImpl) {
+        DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
+        if (minimum == null) {
           minimum = dec.minimum;
-        }
-        if (maximum.compareTo(dec.maximum) < 0) {
           maximum = dec.maximum;
+          sum = dec.sum;
+        } else if (dec.minimum != null) {
+          if (minimum.compareTo(dec.minimum) > 0) {
+            minimum = dec.minimum;
+          }
+          if (maximum.compareTo(dec.maximum) < 0) {
+            maximum = dec.maximum;
+          }
+          if (sum == null || dec.sum == null) {
+            sum = null;
+          } else {
+            sum = sum.add(dec.sum);
+          }
         }
-        if (sum == null || dec.sum == null) {
-          sum = null;
-        } else {
-          sum = sum.add(dec.sum);
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
         }
       }
+      super.merge(other);
     }
 
     @Override
@@ -582,7 +619,7 @@ class ColumnStatisticsImpl implements ColumnStatistics {
       OrcProto.ColumnStatistics.Builder result = super.serialize();
       OrcProto.DecimalStatistics.Builder dec =
           OrcProto.DecimalStatistics.newBuilder();
-      if (getNumberOfValues() != 0) {
+      if (getNumberOfValues() != 0 && minimum != null) {
         dec.setMinimum(minimum.toString());
         dec.setMaximum(maximum.toString());
       }
@@ -666,19 +703,25 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     void merge(ColumnStatisticsImpl other) {
-      super.merge(other);
-      DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
-      if (minimum == null) {
-        minimum = dateStats.minimum;
-        maximum = dateStats.maximum;
-      } else if (dateStats.minimum != null) {
-        if (minimum > dateStats.minimum) {
+      if (other instanceof DateStatisticsImpl) {
+        DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
+        if (minimum == null) {
           minimum = dateStats.minimum;
-        }
-        if (maximum < dateStats.maximum) {
           maximum = dateStats.maximum;
+        } else if (dateStats.minimum != null) {
+          if (minimum > dateStats.minimum) {
+            minimum = dateStats.minimum;
+          }
+          if (maximum < dateStats.maximum) {
+            maximum = dateStats.maximum;
+          }
+        }
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of date column statistics");
         }
       }
+      super.merge(other);
     }
 
     @Override
@@ -686,7 +729,7 @@ class ColumnStatisticsImpl implements ColumnStatistics {
       OrcProto.ColumnStatistics.Builder result = super.serialize();
       OrcProto.DateStatistics.Builder dateStats =
           OrcProto.DateStatistics.newBuilder();
-      if (getNumberOfValues() != 0) {
+      if (getNumberOfValues() != 0 && minimum != null) {
         dateStats.setMinimum(minimum);
         dateStats.setMaximum(maximum);
       }
@@ -769,19 +812,25 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     void merge(ColumnStatisticsImpl other) {
-      super.merge(other);
-      TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
-      if (minimum == null) {
-        minimum = timestampStats.minimum;
-        maximum = timestampStats.maximum;
-      } else if (timestampStats.minimum != null) {
-        if (minimum > timestampStats.minimum) {
+      if (other instanceof TimestampStatisticsImpl) {
+        TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
+        if (minimum == null) {
           minimum = timestampStats.minimum;
-        }
-        if (maximum < timestampStats.maximum) {
           maximum = timestampStats.maximum;
+        } else if (timestampStats.minimum != null) {
+          if (minimum > timestampStats.minimum) {
+            minimum = timestampStats.minimum;
+          }
+          if (maximum < timestampStats.maximum) {
+            maximum = timestampStats.maximum;
+          }
+        }
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
         }
       }
+      super.merge(other);
     }
 
     @Override
@@ -789,7 +838,7 @@ class ColumnStatisticsImpl implements ColumnStatistics {
       OrcProto.ColumnStatistics.Builder result = super.serialize();
       OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
           .newBuilder();
-      if (getNumberOfValues() != 0) {
+      if (getNumberOfValues() != 0 && minimum != null) {
         timestampStats.setMinimum(minimum);
         timestampStats.setMaximum(maximum);
       }
@@ -878,6 +927,10 @@ class ColumnStatisticsImpl implements ColumnStatistics {
     throw new UnsupportedOperationException("Can't update timestamp");
   }
 
+  boolean isStatsExists() {
+    return (count > 0 || hasNull == true);
+  }
+
   void merge(ColumnStatisticsImpl stats) {
     count += stats.count;
     hasNull |= stats.hasNull;

http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java
index 11f05c6..a62fc1e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java
@@ -18,26 +18,35 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.WritableComparable;
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.List;
 
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.WritableComparable;
+
 /**
  * Key for OrcFileMergeMapper task. Contains orc file related information that
  * should match before merging two orc files.
  */
 public class OrcFileKeyWrapper implements WritableComparable<OrcFileKeyWrapper> {
 
-  protected Path inputPath;
-  protected CompressionKind compression;
-  protected long compressBufferSize;
-  protected List<OrcProto.Type> types;
-  protected int rowIndexStride;
-  protected OrcFile.Version version;
+  private Path inputPath;
+  private CompressionKind compression;
+  private long compressBufferSize;
+  private List<OrcProto.Type> types;
+  private int rowIndexStride;
+  private OrcFile.Version version;
+  private boolean isIncompatFile;
+
+  public boolean isIncompatFile() {
+    return isIncompatFile;
+  }
+
+  public void setIsIncompatFile(boolean isIncompatFile) {
+    this.isIncompatFile = isIncompatFile;
+  }
 
   public OrcFile.Version getVersion() {
     return version;

http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java
index cf6fa2a..41a97a3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java
@@ -18,19 +18,18 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.RecordReader;
 
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-
 public class OrcFileStripeMergeRecordReader implements
     RecordReader<OrcFileKeyWrapper, OrcFileValueWrapper> {
-
   private final Reader reader;
   private final Path path;
   protected Iterator<StripeInformation> iter;
@@ -38,6 +37,7 @@ public class OrcFileStripeMergeRecordReader implements
   private int stripeIdx;
   private long start;
   private long end;
+  private boolean skipFile;
 
   public OrcFileStripeMergeRecordReader(Configuration conf, FileSplit split) throws IOException
{
     path = split.getPath();
@@ -68,11 +68,23 @@ public class OrcFileStripeMergeRecordReader implements
 
   @Override
   public boolean next(OrcFileKeyWrapper key, OrcFileValueWrapper value) throws IOException
{
+    if (skipFile) {
+      return false;
+    }
     return nextStripe(key, value);
   }
 
   protected boolean nextStripe(OrcFileKeyWrapper keyWrapper, OrcFileValueWrapper valueWrapper)
       throws IOException {
+    // missing stripe stats (old format). If numRows is 0 then its an empty file and no statistics
+    // is present. We have to differentiate no stats (empty file) vs missing stats (old format).
+    if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows()
> 0) {
+      keyWrapper.setInputPath(path);
+      keyWrapper.setIsIncompatFile(true);
+      skipFile = true;
+      return true;
+    }
+
     while (iter.hasNext()) {
       StripeInformation si = iter.next();
 

http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/test/queries/clientpositive/orc_merge9.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_merge9.q b/ql/src/test/queries/clientpositive/orc_merge9.q
new file mode 100644
index 0000000..010b5a1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_merge9.q
@@ -0,0 +1,44 @@
+create table ts_merge (
+userid bigint,
+string1 string,
+subtype double,
+decimal1 decimal(38,18),
+ts timestamp
+) stored as orc;
+
+load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table ts_merge;
+load data local inpath '../../data/files/orc_split_elim.orc' into table ts_merge;
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/ts_merge/;
+
+set hive.merge.orcfile.stripe.level=true;
+set hive.merge.tezfiles=true;
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+select count(*) from ts_merge;
+alter table ts_merge concatenate;
+select count(*) from ts_merge;
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/ts_merge/;
+
+-- incompatible merge test (stripe statistics missing)
+
+create table a_merge like alltypesorc;
+
+insert overwrite table a_merge select * from alltypesorc;
+load data local inpath '../../data/files/alltypesorc' into table a_merge;
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/a_merge/;
+
+select count(*) from a_merge;
+alter table a_merge concatenate;
+select count(*) from a_merge;
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/a_merge/;
+
+insert into table a_merge select * from alltypesorc;
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/a_merge/;
+
+select count(*) from a_merge;
+alter table a_merge concatenate;
+select count(*) from a_merge;
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/a_merge/;

http://git-wip-us.apache.org/repos/asf/hive/blob/3f8b0ef8/ql/src/test/results/clientpositive/orc_merge9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_merge9.q.out b/ql/src/test/results/clientpositive/orc_merge9.q.out
new file mode 100644
index 0000000..bdf0fd3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_merge9.q.out
@@ -0,0 +1,186 @@
+PREHOOK: query: create table ts_merge (
+userid bigint,
+string1 string,
+subtype double,
+decimal1 decimal(38,18),
+ts timestamp
+) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ts_merge
+POSTHOOK: query: create table ts_merge (
+userid bigint,
+string1 string,
+subtype double,
+decimal1 decimal(38,18),
+ts timestamp
+) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ts_merge
+PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into
table ts_merge
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@ts_merge
+POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into
table ts_merge
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@ts_merge
+PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table ts_merge
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@ts_merge
+POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table
ts_merge
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@ts_merge
+Found 2 items
+#### A masked pattern was here ####
+PREHOOK: query: select count(*) from ts_merge
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ts_merge
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ts_merge
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ts_merge
+#### A masked pattern was here ####
+50000
+PREHOOK: query: alter table ts_merge concatenate
+PREHOOK: type: ALTER_TABLE_MERGE
+PREHOOK: Input: default@ts_merge
+PREHOOK: Output: default@ts_merge
+POSTHOOK: query: alter table ts_merge concatenate
+POSTHOOK: type: ALTER_TABLE_MERGE
+POSTHOOK: Input: default@ts_merge
+POSTHOOK: Output: default@ts_merge
+PREHOOK: query: select count(*) from ts_merge
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ts_merge
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ts_merge
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ts_merge
+#### A masked pattern was here ####
+50000
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: -- incompatible merge test (stripe statistics missing)
+
+create table a_merge like alltypesorc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a_merge
+POSTHOOK: query: -- incompatible merge test (stripe statistics missing)
+
+create table a_merge like alltypesorc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a_merge
+PREHOOK: query: insert overwrite table a_merge select * from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@a_merge
+POSTHOOK: query: insert overwrite table a_merge select * from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@a_merge
+POSTHOOK: Lineage: a_merge.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint,
type:bigint, comment:null), ]
+POSTHOOK: Lineage: a_merge.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1,
type:boolean, comment:null), ]
+POSTHOOK: Lineage: a_merge.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2,
type:boolean, comment:null), ]
+POSTHOOK: Lineage: a_merge.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: a_merge.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+POSTHOOK: Lineage: a_merge.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int,
comment:null), ]
+POSTHOOK: Lineage: a_merge.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), ]
+POSTHOOK: Lineage: a_merge.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: a_merge.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2,
type:string, comment:null), ]
+POSTHOOK: Lineage: a_merge.ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1,
type:timestamp, comment:null), ]
+POSTHOOK: Lineage: a_merge.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2,
type:timestamp, comment:null), ]
+POSTHOOK: Lineage: a_merge.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table a_merge
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@a_merge
+POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table a_merge
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@a_merge
+Found 2 items
+#### A masked pattern was here ####
+PREHOOK: query: select count(*) from a_merge
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a_merge
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from a_merge
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a_merge
+#### A masked pattern was here ####
+24576
+PREHOOK: query: alter table a_merge concatenate
+PREHOOK: type: ALTER_TABLE_MERGE
+PREHOOK: Input: default@a_merge
+PREHOOK: Output: default@a_merge
+POSTHOOK: query: alter table a_merge concatenate
+POSTHOOK: type: ALTER_TABLE_MERGE
+POSTHOOK: Input: default@a_merge
+POSTHOOK: Output: default@a_merge
+PREHOOK: query: select count(*) from a_merge
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a_merge
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from a_merge
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a_merge
+#### A masked pattern was here ####
+24576
+Found 2 items
+#### A masked pattern was here ####
+PREHOOK: query: insert into table a_merge select * from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@a_merge
+POSTHOOK: query: insert into table a_merge select * from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@a_merge
+POSTHOOK: Lineage: a_merge.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint,
type:bigint, comment:null), ]
+POSTHOOK: Lineage: a_merge.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1,
type:boolean, comment:null), ]
+POSTHOOK: Lineage: a_merge.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2,
type:boolean, comment:null), ]
+POSTHOOK: Lineage: a_merge.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: a_merge.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+POSTHOOK: Lineage: a_merge.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int,
comment:null), ]
+POSTHOOK: Lineage: a_merge.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), ]
+POSTHOOK: Lineage: a_merge.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: a_merge.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2,
type:string, comment:null), ]
+POSTHOOK: Lineage: a_merge.ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1,
type:timestamp, comment:null), ]
+POSTHOOK: Lineage: a_merge.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2,
type:timestamp, comment:null), ]
+POSTHOOK: Lineage: a_merge.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+Found 3 items
+#### A masked pattern was here ####
+PREHOOK: query: select count(*) from a_merge
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a_merge
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from a_merge
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a_merge
+#### A masked pattern was here ####
+36864
+PREHOOK: query: alter table a_merge concatenate
+PREHOOK: type: ALTER_TABLE_MERGE
+PREHOOK: Input: default@a_merge
+PREHOOK: Output: default@a_merge
+POSTHOOK: query: alter table a_merge concatenate
+POSTHOOK: type: ALTER_TABLE_MERGE
+POSTHOOK: Input: default@a_merge
+POSTHOOK: Output: default@a_merge
+PREHOOK: query: select count(*) from a_merge
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a_merge
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from a_merge
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a_merge
+#### A masked pattern was here ####
+36864
+Found 2 items
+#### A masked pattern was here ####


Mime
View raw message