hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From da...@apache.org
Subject hive git commit: HIVE-10809: HCat FileOutputCommitterContainer leaves behind empty _SCRATCH directories (Selina Zhang, reviewed by Daniel Dai)
Date Sat, 03 Sep 2016 00:38:50 GMT
Repository: hive
Updated Branches:
  refs/heads/master 769d562c7 -> 10f4db30c


HIVE-10809: HCat FileOutputCommitterContainer leaves behind empty _SCRATCH directories (Selina
Zhang, reviewed by Daniel Dai)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/10f4db30
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/10f4db30
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/10f4db30

Branch: refs/heads/master
Commit: 10f4db30ce7ced65f6131c6b3be0d73e984c88e8
Parents: 769d562
Author: Daniel Dai <daijy@hortonworks.com>
Authored: Fri Sep 2 17:39:40 2016 -0700
Committer: Daniel Dai <daijy@hortonworks.com>
Committed: Fri Sep 2 17:39:40 2016 -0700

----------------------------------------------------------------------
 .../mapreduce/FileOutputCommitterContainer.java | 13 +++++++++---
 .../hive/hcatalog/pig/TestHCatStorer.java       | 21 +++++++++++++-------
 2 files changed, 24 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/10f4db30/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
index 9056f11..847c3bc 100644
--- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
+++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
@@ -28,6 +28,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -38,14 +39,14 @@ import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
-import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
-import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.mapred.JobConf;
@@ -887,7 +888,13 @@ class FileOutputCommitterContainer extends OutputCommitterContainer {
             moveTaskOutputs(fs, src, src, dest, true, table.isImmutable());
             moveTaskOutputs(fs,src,src,dest,false,table.isImmutable());
             if (!src.equals(dest)){
-              fs.delete(src, true);
+              if (src.toString().matches(".*" + Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+.*")){
+                // src is scratch directory, need to trim the part key value pairs from path
+                String diff = StringUtils.difference(src.toString(), dest.toString());
+                fs.delete(new Path(StringUtils.substringBefore(src.toString(), diff)), true);
+              } else {
+                fs.delete(src, true);
+              }
             }
 
             // Now, we check if the partition already exists. If not, we go ahead.

http://git-wip-us.apache.org/repos/asf/hive/blob/10f4db30/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
index 86d705c..b6f8a6f 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
@@ -36,14 +36,14 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.CommandNeedRetryException;
 import org.apache.hadoop.hive.ql.io.IOConstants;
 import org.apache.hadoop.hive.ql.io.StorageFormats;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-
 import org.apache.hive.hcatalog.HcatTestUtils;
 import org.apache.hive.hcatalog.mapreduce.HCatBaseTest;
-
 import org.apache.pig.EvalFunc;
 import org.apache.pig.ExecType;
 import org.apache.pig.PigException;
@@ -52,15 +52,12 @@ import org.apache.pig.data.DataByteArray;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.util.LogUtils;
-
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
-
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -505,7 +502,7 @@ public class TestHCatStorer extends HCatBaseTest {
   }
 
   @Test
-  public void testMultiPartColsInData() throws IOException, CommandNeedRetryException {
+  public void testMultiPartColsInData() throws Exception {
     assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
 
     driver.run("drop table employee");
@@ -545,11 +542,17 @@ public class TestHCatStorer extends HCatBaseTest {
     assertEquals(inputData[1], results.get(1));
     assertEquals(inputData[2], results.get(2));
     assertEquals(inputData[3], results.get(3));
+    // verify the directories in table location
+    Path path = new Path(client.getTable("default","employee").getSd().getLocation());
+    FileSystem fs = path.getFileSystem(hiveConf);
+    assertEquals(1, fs.listStatus(path).length);
+    assertEquals(4, fs.listStatus(new Path(client.getTable("default","employee").getSd().getLocation()
+        + File.separator + "emp_country=IN")).length);
     driver.run("drop table employee");
   }
 
   @Test
-  public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryException {
+  public void testStoreInPartiitonedTbl() throws Exception {
     assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
 
     driver.run("drop table junit_unparted");
@@ -582,6 +585,10 @@ public class TestHCatStorer extends HCatBaseTest {
 
     assertFalse(itr.hasNext());
     assertEquals(11, i);
+    // verify the scratch directories has been cleaned up
+    Path path = new Path(client.getTable("default","junit_unparted").getSd().getLocation());
+    FileSystem fs = path.getFileSystem(hiveConf);
+    assertEquals(1, fs.listStatus(path).length);
   }
 
   @Test


Mime
View raw message