Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id D353B200B50 for ; Fri, 15 Jul 2016 00:09:04 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id D1E76160A85; Thu, 14 Jul 2016 22:09:04 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 54A56160A63 for ; Fri, 15 Jul 2016 00:09:03 +0200 (CEST) Received: (qmail 40333 invoked by uid 500); 14 Jul 2016 22:09:02 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 40322 invoked by uid 99); 14 Jul 2016 22:09:02 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 14 Jul 2016 22:09:02 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 4C48EE383A; Thu, 14 Jul 2016 22:09:02 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: weiz@apache.org To: commits@hive.apache.org Message-Id: <40eadc84aef9461ca1c1f140f9d7c1db@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-13040 : Handle empty bucket creations more efficiently (Ashutosh Chauhan, reviewed by Prasanth Jayachandran) Date: Thu, 14 Jul 2016 22:09:02 +0000 (UTC) archived-at: Thu, 14 Jul 2016 22:09:05 -0000 Repository: hive Updated Branches: refs/heads/branch-1 8f500f8ad -> 3e51861a2 HIVE-13040 : Handle empty bucket creations more efficiently (Ashutosh Chauhan, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3e51861a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3e51861a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3e51861a Branch: refs/heads/branch-1 Commit: 3e51861a215f62e842489f584a87b5be96316a41 Parents: 8f500f8 Author: Wei Zheng Authored: Thu Jul 14 15:09:48 2016 -0700 Committer: Wei Zheng Committed: Thu Jul 14 15:09:48 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/StatsNoJobTask.java | 67 ++++++++++--------- .../apache/hadoop/hive/ql/exec/Utilities.java | 5 +- .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 17 +++-- .../apache/hadoop/hive/ql/io/orc/OrcFile.java | 8 +++ .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 16 +++-- .../hadoop/hive/ql/io/orc/OrcOutputFormat.java | 17 ++--- .../hive/ql/txn/compactor/CompactorMR.java | 2 +- .../hadoop/hive/ql/txn/compactor/Initiator.java | 2 +- .../hive/ql/io/orc/TestInputOutputFormat.java | 10 +-- .../dynpart_sort_opt_vectorization.q.out | 4 +- .../tez/dynpart_sort_opt_vectorization.q.out | 8 +-- .../tez/dynpart_sort_optimization.q.out | 70 +++++++++----------- .../apache/hadoop/hive/shims/Hadoop23Shims.java | 2 +- 13 files changed, 121 insertions(+), 107 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java index 0d99cbc..fe49e15 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java @@ -123,7 +123,7 @@ public class StatsNoJobTask extends Task implements Serializable class StatsCollection implements Runnable { - private Partition partn; + private final Partition partn; public StatsCollection(Partition part) { this.partn = part; @@ -148,7 +148,7 @@ public class StatsNoJobTask extends Task implements Serializable boolean statsAvailable = false; for(FileStatus file: fileList) { if (!file.isDir()) { - InputFormat inputFormat = (InputFormat) ReflectionUtil.newInstance( + InputFormat inputFormat = ReflectionUtil.newInstance( partn.getInputFormatClass(), jc); InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[] { partn.getLocation() }); @@ -193,7 +193,7 @@ public class StatsNoJobTask extends Task implements Serializable "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e)); // Before updating the partition params, if any partition params is null - // and if statsReliable is true then updatePartition() function will fail + // and if statsReliable is true then updatePartition() function will fail // the task by returning 1 if (work.isStatsReliable()) { partUpdates.put(tPart.getSd().getLocation(), null); @@ -244,40 +244,45 @@ public class StatsNoJobTask extends Task implements Serializable boolean statsAvailable = false; for(FileStatus file: fileList) { if (!file.isDir()) { - InputFormat inputFormat = (InputFormat) ReflectionUtil.newInstance( + InputFormat inputFormat = ReflectionUtil.newInstance( table.getInputFormatClass(), jc); - InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[] { table - .getDataLocation().toString() }); - org.apache.hadoop.mapred.RecordReader recordReader = - inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL); - StatsProvidingRecordReader statsRR; - if (recordReader instanceof StatsProvidingRecordReader) { - statsRR = (StatsProvidingRecordReader) recordReader; - numRows += statsRR.getStats().getRowCount(); - rawDataSize += statsRR.getStats().getRawDataSize(); - fileSize += file.getLen(); + InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[]{table + .getDataLocation().toString()}); + if (file.getLen() == 0) { numFiles += 1; statsAvailable = true; + } else { + org.apache.hadoop.mapred.RecordReader recordReader = + inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL); + StatsProvidingRecordReader statsRR; + if (recordReader instanceof StatsProvidingRecordReader) { + statsRR = (StatsProvidingRecordReader) recordReader; + numRows += statsRR.getStats().getRowCount(); + rawDataSize += statsRR.getStats().getRawDataSize(); + fileSize += file.getLen(); + numFiles += 1; + statsAvailable = true; + } + recordReader.close(); } - recordReader.close(); } - } - if (statsAvailable) { - parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows)); - parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize)); - parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize)); - parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles)); - parameters.put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, StatsSetupConst.TRUE); - - db.alterTable(tableFullName, new Table(tTable)); - - String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']'; - LOG.debug(msg); - console.printInfo(msg); - } else { - String msg = "Table " + tableFullName + " does not provide stats."; - LOG.debug(msg); + if (statsAvailable) { + parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows)); + parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize)); + parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize)); + parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles)); + parameters.put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, StatsSetupConst.TRUE); + + db.alterTable(tableFullName, new Table(tTable)); + + String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']'; + LOG.debug(msg); + console.printInfo(msg); + } else { + String msg = "Table " + tableFullName + " does not provide stats."; + LOG.debug(msg); + } } } catch (Exception e) { console.printInfo("[Warning] could not update stats for " + tableFullName + ".", http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 4093134..0a32e6c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2135,7 +2135,7 @@ public final class Utilities { taskIDToFile = removeTempOrDuplicateFiles(items, fs); // if the table is bucketed and enforce bucketing, we should check and generate all buckets - if (dpCtx.getNumBuckets() > 0 && taskIDToFile != null) { + if (dpCtx.getNumBuckets() > 0 && taskIDToFile != null && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { // refresh the file list items = fs.listStatus(parts[i].getPath()); // get the missing buckets and generate empty buckets @@ -2155,8 +2155,7 @@ public final class Utilities { FileStatus[] items = fs.listStatus(path); taskIDToFile = removeTempOrDuplicateFiles(items, fs); if(taskIDToFile != null && taskIDToFile.size() > 0 && conf != null && conf.getTable() != null - && (conf.getTable().getNumBuckets() > taskIDToFile.size()) - && (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEENFORCEBUCKETING))) { + && (conf.getTable().getNumBuckets() > taskIDToFile.size()) && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { // get the missing buckets and generate empty buckets for non-dynamic partition String taskID1 = taskIDToFile.keySet().iterator().next(); Path bucketPath = taskIDToFile.values().iterator().next().getPath(); http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 8bcf6d7..7d1517d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -102,6 +102,7 @@ public class AcidUtils { Pattern.compile("[0-9]+_[0-9]+"); public static final PathFilter hiddenFileFilter = new PathFilter(){ + @Override public boolean accept(Path p){ String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); @@ -460,7 +461,14 @@ public class AcidUtils { return false; } - /** + public static Directory getAcidState(Path directory, + Configuration conf, + ValidTxnList txnList + ) throws IOException { + return getAcidState(directory, conf, txnList, false); + } + + /** * Get the ACID state of the given directory. It finds the minimal set of * base and diff directories. Note that because major compactions don't * preserve the history, we can't use a base directory that includes a @@ -473,7 +481,8 @@ public class AcidUtils { */ public static Directory getAcidState(Path directory, Configuration conf, - ValidTxnList txnList + ValidTxnList txnList, + boolean ignoreEmptyFiles ) throws IOException { FileSystem fs = directory.getFileSystem(conf); FileStatus bestBase = null; @@ -513,7 +522,7 @@ public class AcidUtils { // it is possible that the cleaner is running and removing these original files, // in which case recursing through them could cause us to get an error. originalDirectories.add(child); - } else { + } else if (!ignoreEmptyFiles || child.getLen() != 0) { original.add(child); } } @@ -590,7 +599,7 @@ public class AcidUtils { } }; } - + /** * Find the original files (non-ACID layout) recursively under the partition * directory. http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index 906eb6b..dc00e38 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -491,6 +491,14 @@ public final class OrcFile { return this; } + public FileSystem getFileSystem() { + return fileSystemValue; + } + + public Configuration getConfiguration() { + return configuration; + } + public int getBufferSize() { return bufferSizeValue; } http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 35469d1..94b5461 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -633,12 +633,14 @@ public class OrcInputFormat implements InputFormat, public List getSplits() throws IOException { List splits = Lists.newArrayList(); for (FileStatus fileStatus : fileStatuses) { - TreeMap blockOffsets = SHIMS.getLocationsWithOffset(fs, fileStatus); - for (Map.Entry entry : blockOffsets.entrySet()) { - OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), entry.getKey(), - entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true, - deltas, -1, fileStatus.getLen()); - splits.add(orcSplit); + if (fileStatus.getLen() != 0) { + TreeMap blockOffsets = SHIMS.getLocationsWithOffset(fs, fileStatus); + for (Map.Entry entry : blockOffsets.entrySet()) { + OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), entry.getKey(), + entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true, + deltas, -1, fileStatus.getLen()); + splits.add(orcSplit); + } } } @@ -710,7 +712,7 @@ public class OrcInputFormat implements InputFormat, public SplitStrategy call() throws IOException { final SplitStrategy splitStrategy; AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir, - context.conf, context.transactionList); + context.conf, context.transactionList, true); List deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories()); Path base = dirInfo.getBaseDirectory(); List original = dirInfo.getOriginalFiles(); http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java index bc55677..7d1b994 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java @@ -93,23 +93,20 @@ public class OrcOutputFormat extends FileOutputFormat @Override public void close(boolean b) throws IOException { - // if we haven't written any rows, we need to create a file with a - // generic schema. if (writer == null) { - // a row with no columns - ObjectInspector inspector = ObjectInspectorFactory. - getStandardStructObjectInspector(new ArrayList(), - new ArrayList()); - options.inspector(inspector); - writer = OrcFile.createWriter(path, options); + // we are closing a file without writing any data in it + FileSystem fs = options.getFileSystem() == null ? + path.getFileSystem(options.getConfiguration()) : options.getFileSystem(); + fs.createNewFile(path); + return; } writer.close(); } @Override public SerDeStats getStats() { - stats.setRawDataSize(writer.getRawDataSize()); - stats.setRowCount(writer.getNumberOfRows()); + stats.setRawDataSize(null == writer ? 0 : writer.getRawDataSize()); + stats.setRowCount(null == writer ? 0 : writer.getNumberOfRows()); return stats; } } http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index e7ea70f..d99bbd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -200,7 +200,7 @@ public class CompactorMR { // and discovering that in getSplits is too late as we then have no way to pass it to our // mapper. - AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns); + AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns, true); List parsedDeltas = dir.getCurrentDirectories(); int maxDeltastoHandle = conf.getIntVar(HiveConf.ConfVars.COMPACTOR_MAX_NUM_DELTA); if(parsedDeltas.size() > maxDeltastoHandle) { http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java index 1a63f99..d654b76 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java @@ -253,7 +253,7 @@ public class Initiator extends CompactorThread { boolean noBase = false; Path location = new Path(sd.getLocation()); FileSystem fs = location.getFileSystem(conf); - AcidUtils.Directory dir = AcidUtils.getAcidState(location, conf, txns); + AcidUtils.Directory dir = AcidUtils.getAcidState(location, conf, txns, false); Path base = dir.getBaseDirectory(); long baseSize = 0; FileStatus stat = null; http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 31d561b..47abc74 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -537,11 +537,11 @@ public class TestInputOutputFormat { public void testFileGenerator() throws Exception { OrcInputFormat.Context context = new OrcInputFormat.Context(conf); MockFileSystem fs = new MockFileSystem(conf, - new MockFile("mock:/a/b/part-00", 1000, new byte[0]), - new MockFile("mock:/a/b/part-01", 1000, new byte[0]), - new MockFile("mock:/a/b/_part-02", 1000, new byte[0]), - new MockFile("mock:/a/b/.part-03", 1000, new byte[0]), - new MockFile("mock:/a/b/part-04", 1000, new byte[0])); + new MockFile("mock:/a/b/part-00", 1000, new byte[1]), + new MockFile("mock:/a/b/part-01", 1000, new byte[1]), + new MockFile("mock:/a/b/_part-02", 1000, new byte[1]), + new MockFile("mock:/a/b/.part-03", 1000, new byte[1]), + new MockFile("mock:/a/b/part-04", 1000, new byte[1])); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b")); http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out index 4c8ddd3..87f19ab 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out @@ -1114,7 +1114,7 @@ Partition Parameters: numFiles 8 numRows 6 rawDataSize 120 - totalSize 2212 + totalSize 2016 #### A masked pattern was here #### # Storage Information @@ -1200,7 +1200,7 @@ Partition Parameters: numFiles 8 numRows 6 rawDataSize 120 - totalSize 2212 + totalSize 2016 #### A masked pattern was here #### # Storage Information http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out index 22afcbf..6967548 100644 --- a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out @@ -1163,10 +1163,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 + numFiles 4 numRows 6 rawDataSize 120 - totalSize 2212 + totalSize 2016 #### A masked pattern was here #### # Storage Information @@ -1249,10 +1249,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 + numFiles 4 numRows 6 rawDataSize 120 - totalSize 2212 + totalSize 2016 #### A masked pattern was here #### # Storage Information http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out index 6df203a..3032b21 100644 --- a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out @@ -1076,7 +1076,7 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 + numFiles 4 numRows 6 rawDataSize 156 totalSize 162 @@ -1162,7 +1162,7 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 + numFiles 4 numRows 6 rawDataSize 156 totalSize 162 @@ -2435,7 +2435,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -2449,15 +2448,15 @@ STAGE PLANS: predicate: (s = 'foo') (type: boolean) Statistics: Num rows: 1974 Data size: 53304 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: si (type: smallint), b (type: bigint), f (type: float), t (type: tinyint), i (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col5 + expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), t (type: tinyint), i (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1974 Data size: 53304 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 'foo' (type: string), _col4 (type: tinyint), _col5 (type: int) + key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ - Map-reduce partition columns: 'foo' (type: string), _col4 (type: tinyint), _col5 (type: int) + Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Statistics: Num rows: 1974 Data size: 53304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), 'foo' (type: string), _col4 (type: tinyint), _col5 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Reducer 2 Reduce Operator Tree: Select Operator @@ -2506,7 +2505,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -2520,15 +2518,15 @@ STAGE PLANS: predicate: (t = 27) (type: boolean) Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), i (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col5 + expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), 27 (type: tinyint), i (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), 27 (type: tinyint), _col5 (type: int) + key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ - Map-reduce partition columns: _col3 (type: string), 27 (type: tinyint), _col5 (type: int) + Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), 27 (type: tinyint), _col5 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Reducer 2 Reduce Operator Tree: Select Operator @@ -2577,7 +2575,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -2591,15 +2588,15 @@ STAGE PLANS: predicate: (i = 100) (type: boolean) Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), t (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), t (type: tinyint), 100 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: tinyint), 100 (type: int) + key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ - Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), 100 (type: int) + Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), 100 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Reducer 2 Reduce Operator Tree: Select Operator @@ -2648,7 +2645,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -2662,15 +2658,15 @@ STAGE PLANS: predicate: ((i = 100) and (t = 27)) (type: boolean) Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), 27 (type: tinyint), 100 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), 27 (type: tinyint), 100 (type: int) + key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ - Map-reduce partition columns: _col3 (type: string), 27 (type: tinyint), 100 (type: int) + Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), 27 (type: tinyint), 100 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Reducer 2 Reduce Operator Tree: Select Operator @@ -2719,7 +2715,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -2733,15 +2728,15 @@ STAGE PLANS: predicate: ((i = 100) and (s = 'foo')) (type: boolean) Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: si (type: smallint), b (type: bigint), f (type: float), t (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col4 + expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), t (type: tinyint), 100 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 'foo' (type: string), _col4 (type: tinyint), 100 (type: int) + key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ - Map-reduce partition columns: 'foo' (type: string), _col4 (type: tinyint), 100 (type: int) + Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), 'foo' (type: string), _col4 (type: tinyint), 100 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Reducer 2 Reduce Operator Tree: Select Operator @@ -2790,7 +2785,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -2804,15 +2798,15 @@ STAGE PLANS: predicate: ((t = 27) and (s = 'foo')) (type: boolean) Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: si (type: smallint), b (type: bigint), f (type: float), i (type: int) - outputColumnNames: _col0, _col1, _col2, _col5 + expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), 27 (type: tinyint), i (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 'foo' (type: string), 27 (type: tinyint), _col5 (type: int) + key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ - Map-reduce partition columns: 'foo' (type: string), 27 (type: tinyint), _col5 (type: int) + Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), 'foo' (type: string), 27 (type: tinyint), _col5 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) Reducer 2 Reduce Operator Tree: Select Operator http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java ---------------------------------------------------------------------- diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index 2e09882..5f78481 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -163,7 +163,7 @@ public class Hadoop23Shims extends HadoopShimsSecure { Iterator it = result.iterator(); while (it.hasNext()) { FileStatus stat = it.next(); - if (!stat.isFile()) { + if (!stat.isFile() || (stat.getLen() == 0 && !stat.getPath().toUri().getScheme().equals("nullscan"))) { it.remove(); } }