Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 42607200B92 for ; Wed, 28 Sep 2016 23:18:09 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 3F8D5160AD4; Wed, 28 Sep 2016 21:18:09 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 63444160AD3 for ; Wed, 28 Sep 2016 23:18:08 +0200 (CEST) Received: (qmail 52799 invoked by uid 500); 28 Sep 2016 21:18:07 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 52768 invoked by uid 99); 28 Sep 2016 21:18:07 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 28 Sep 2016 21:18:07 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 4D415E08AE; Wed, 28 Sep 2016 21:18:07 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: spena@apache.org To: commits@hive.apache.org Date: Wed, 28 Sep 2016 21:18:10 -0000 Message-Id: In-Reply-To: <9ca74de59b6d4831a48272fa32a4a451@git.apache.org> References: <9ca74de59b6d4831a48272fa32a4a451@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [4/4] hive git commit: HIVE-14137: Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables (Sahil Takiar, reviewed by Sergio Pena) archived-at: Wed, 28 Sep 2016 21:18:09 -0000 HIVE-14137: Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables (Sahil Takiar, reviewed by Sergio Pena) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/878b0e7d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/878b0e7d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/878b0e7d Branch: refs/heads/branch-2.1 Commit: 878b0e7de1743059ea70807858a8ce01c360cb18 Parents: a8e101b Author: Sergio Pena Authored: Wed Sep 28 16:08:40 2016 -0500 Committer: Sergio Pena Committed: Wed Sep 28 16:08:40 2016 -0500 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/exec/Utilities.java | 18 +++-- .../hadoop/hive/ql/exec/TestUtilities.java | 75 ++++++++++++++++++++ 2 files changed, 83 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/878b0e7d/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 8f7bbb2..4556301 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2944,7 +2944,6 @@ public final class Utilities { */ public static List getInputPaths(JobConf job, MapWork work, Path hiveScratchDir, Context ctx, boolean skipDummy) throws Exception { - int sequenceNumber = 0; Set pathsProcessed = new HashSet(); List pathsToAdd = new LinkedList(); @@ -2971,7 +2970,7 @@ public final class Utilities { if (!skipDummy && isEmptyPath(job, path, ctx)) { path = createDummyFileForEmptyPartition(path, job, work, - hiveScratchDir, alias, sequenceNumber++); + hiveScratchDir); } pathsToAdd.add(path); @@ -2987,8 +2986,7 @@ public final class Utilities { // If T is empty and T2 contains 100 rows, the user expects: 0, 100 (2 // rows) if (path == null && !skipDummy) { - path = createDummyFileForEmptyTable(job, work, hiveScratchDir, - alias, sequenceNumber++); + path = createDummyFileForEmptyTable(job, work, hiveScratchDir, alias); pathsToAdd.add(path); } } @@ -2998,11 +2996,11 @@ public final class Utilities { @SuppressWarnings({"rawtypes", "unchecked"}) private static Path createEmptyFile(Path hiveScratchDir, HiveOutputFormat outFileFormat, JobConf job, - int sequenceNumber, Properties props, boolean dummyRow) + Properties props, boolean dummyRow) throws IOException, InstantiationException, IllegalAccessException { // create a dummy empty file in a new directory - String newDir = hiveScratchDir + Path.SEPARATOR + sequenceNumber; + String newDir = hiveScratchDir + Path.SEPARATOR + UUID.randomUUID().toString(); Path newPath = new Path(newDir); FileSystem fs = newPath.getFileSystem(job); fs.mkdirs(newPath); @@ -3028,7 +3026,7 @@ public final class Utilities { @SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, MapWork work, - Path hiveScratchDir, String alias, int sequenceNumber) + Path hiveScratchDir) throws Exception { String strPath = path.toString(); @@ -3047,7 +3045,7 @@ public final class Utilities { boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, - sequenceNumber, props, oneRow); + props, oneRow); if (LOG.isInfoEnabled()) { LOG.info("Changed input file " + strPath + " to empty file " + newPath); @@ -3072,7 +3070,7 @@ public final class Utilities { @SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, - Path hiveScratchDir, String alias, int sequenceNumber) + Path hiveScratchDir, String alias) throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); @@ -3085,7 +3083,7 @@ public final class Utilities { HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, - sequenceNumber, props, false); + props, false); if (LOG.isInfoEnabled()) { LOG.info("Changed input file for alias " + alias + " to " + newPath); http://git-wip-us.apache.org/repos/asf/hive/blob/878b0e7d/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java index d2060a1..adde59f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.exec; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import static org.apache.hadoop.hive.ql.exec.Utilities.getFileExtension; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -30,13 +31,19 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.LinkedHashMap; +import java.util.Properties; +import java.util.UUID; import org.apache.commons.io.FileUtils; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; @@ -44,17 +51,25 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFromUtcTimestamp; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.JobConf; + import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.io.Files; @@ -246,4 +261,64 @@ public class TestUtilities { FileSystem.getLocal(hconf).create(taskOutputPath).close(); return tempDirPath; } + + /** + * Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)} + * can process two different empty tables without throwing any exceptions. + */ + @Test + public void testGetInputPathsWithEmptyTables() throws Exception { + String alias1Name = "alias1"; + String alias2Name = "alias2"; + + MapWork mapWork1 = new MapWork(); + MapWork mapWork2 = new MapWork(); + JobConf jobConf = new JobConf(); + + String nonExistentPath1 = UUID.randomUUID().toString(); + String nonExistentPath2 = UUID.randomUUID().toString(); + + PartitionDesc mockPartitionDesc = mock(PartitionDesc.class); + TableDesc mockTableDesc = mock(TableDesc.class); + + when(mockTableDesc.isNonNative()).thenReturn(false); + when(mockTableDesc.getProperties()).thenReturn(new Properties()); + + when(mockPartitionDesc.getProperties()).thenReturn(new Properties()); + when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc); + doReturn(HiveSequenceFileOutputFormat.class).when( + mockPartitionDesc).getOutputFileFormatClass(); + + mapWork1.setPathToAliases(new LinkedHashMap<>( + ImmutableMap.of(nonExistentPath1, Lists.newArrayList(alias1Name)))); + mapWork1.setAliasToWork(new LinkedHashMap>( + ImmutableMap.of(alias1Name, (Operator) mock(Operator.class)))); + mapWork1.setPathToPartitionInfo(new LinkedHashMap<>( + ImmutableMap.of(nonExistentPath1, mockPartitionDesc))); + + mapWork2.setPathToAliases(new LinkedHashMap<>( + ImmutableMap.of(nonExistentPath2, Lists.newArrayList(alias2Name)))); + mapWork2.setAliasToWork(new LinkedHashMap>( + ImmutableMap.of(alias2Name, (Operator) mock(Operator.class)))); + mapWork2.setPathToPartitionInfo(new LinkedHashMap<>( + ImmutableMap.of(nonExistentPath2, mockPartitionDesc))); + + List inputPaths = new ArrayList<>(); + try { + Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR)); + inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork1, scratchDir, + mock(Context.class), false)); + inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork2, scratchDir, + mock(Context.class), false)); + assertEquals(inputPaths.size(), 2); + } finally { + File file; + for (Path path : inputPaths) { + file = new File(path.toString()); + if (file.exists()) { + file.delete(); + } + } + } + } }