hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-17479: Staging directories do not get cleaned up for update/delete queries (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan, Eugene Koifman)
Date Fri, 15 Sep 2017 21:38:36 GMT
Repository: hive
Updated Branches:
  refs/heads/master 50fb6f3cb -> 1706a6b5d


HIVE-17479: Staging directories do not get cleaned up for update/delete queries (Jesus Camacho
Rodriguez, reviewed by Ashutosh Chauhan, Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1706a6b5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1706a6b5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1706a6b5

Branch: refs/heads/master
Commit: 1706a6b5d7f8df7cb84d0f84bc82674e11fe856d
Parents: 50fb6f3
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Wed Sep 13 11:21:43 2017 -0700
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Fri Sep 15 14:23:30 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/Context.java   | 18 +++++++++++++++++-
 .../ql/parse/UpdateDeleteSemanticAnalyzer.java    | 18 ++++++++----------
 2 files changed, 25 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1706a6b5/ql/src/java/org/apache/hadoop/hive/ql/Context.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index 9183edf..f04aed4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -25,9 +25,11 @@ import java.net.URI;
 import java.text.SimpleDateFormat;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -101,6 +103,10 @@ public class Context {
   private TokenRewriteStream tokenRewriteStream;
 
   private final String executionId;
+  // Some statements, e.g., UPDATE, DELETE, or MERGE, get rewritten into different
+  // subqueries that create new contexts. We keep them here so we can clean them
+  // up when we are done.
+  private final Set<Context> rewrittenStatementContexts;
 
   // List of Locks for this query
   protected List<HiveLock> hiveLocks;
@@ -254,9 +260,10 @@ public class Context {
    * Create a Context with a given executionId.  ExecutionId, together with
    * user name and conf, will determine the temporary directory locations.
    */
-  public Context(Configuration conf, String executionId)  {
+  private Context(Configuration conf, String executionId)  {
     this.conf = conf;
     this.executionId = executionId;
+    this.rewrittenStatementContexts = new HashSet<>();
 
     // local & non-local tmp location is configurable. however it is the same across
     // all external file systems
@@ -662,6 +669,11 @@ public class Context {
   }
 
   public void clear() throws IOException {
+    // First clear the other contexts created by this query
+    for (Context subContext : rewrittenStatementContexts) {
+      subContext.clear();
+    }
+    // Then clear this context
     if (resDir != null) {
       try {
         FileSystem fs = resDir.getFileSystem(conf);
@@ -841,6 +853,10 @@ public class Context {
     }
   }
 
+  public void addRewrittenStatementContext(Context context) {
+    rewrittenStatementContexts.add(context);
+  }
+
   public void addCS(String path, ContentSummary cs) {
     pathToCS.put(path, cs);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/1706a6b5/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 78c511b..1bca967 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -32,18 +32,15 @@ import org.antlr.runtime.TokenRewriteStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.TableType;
-import org.apache.hadoop.hive.metastore.TransactionalValidationListener;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.QueryState;
 import org.apache.hadoop.hive.ql.hooks.Entity;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
@@ -284,22 +281,23 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
     }
   }
   /**
-   * Parse the newly generated SQL statment to get a new AST
+   * Parse the newly generated SQL statement to get a new AST
    */
   private ReparseResult parseRewrittenQuery(StringBuilder rewrittenQueryStr, String originalQuery)
throws SemanticException {
+    // Set dynamic partitioning to nonstrict so that queries do not need any partition
+    // references.
+    // todo: this may be a perf issue as it prevents the optimizer.. or not
+    HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
     // Parse the rewritten query string
     Context rewrittenCtx;
     try {
-      // Set dynamic partitioning to nonstrict so that queries do not need any partition
-      // references.
-      // todo: this may be a perf issue as it prevents the optimizer.. or not
-      HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
       rewrittenCtx = new Context(conf);
-      rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
-      rewrittenCtx.setIsUpdateDeleteMerge(true);
+      ctx.addRewrittenStatementContext(rewrittenCtx);
     } catch (IOException e) {
       throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
     }
+    rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
+    rewrittenCtx.setIsUpdateDeleteMerge(true);
     rewrittenCtx.setCmd(rewrittenQueryStr.toString());
 
     ASTNode rewrittenTree;


Mime
View raw message