hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sze...@apache.org
Subject [34/44] hive git commit: HIVE-10006: RSC has memory leak while execute multi queries.[Spark Branch] (Chengxiang via Rui, reviewed by Xuefu)
Date Thu, 23 Apr 2015 02:34:43 GMT
HIVE-10006: RSC has memory leak while execute multi queries.[Spark Branch] (Chengxiang via
Rui, reviewed by Xuefu)

git-svn-id: https://svn.apache.org/repos/asf/hive/branches/spark@1668521 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ba5d8545
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ba5d8545
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ba5d8545

Branch: refs/heads/master
Commit: ba5d8545a2d000c905fb933fcbd6aaa1918a9baa
Parents: 3562c52
Author: Rui Li <lirui@apache.org>
Authored: Mon Mar 23 06:46:44 2015 +0000
Committer: Szehon Ho <szehon@cloudera.com>
Committed: Wed Apr 22 19:33:52 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/exec/spark/SparkPlanGenerator.java  | 22 +++++++++++++-------
 .../hive/ql/io/CombineHiveInputFormat.java      |  4 ++++
 2 files changed, 18 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ba5d8545/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
index 716a6b6..f3b14c5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
@@ -96,14 +96,20 @@ public class SparkPlanGenerator {
     workToTranMap.clear();
     workToParentWorkTranMap.clear();
 
-    for (BaseWork work : sparkWork.getAllWork()) {
-      perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
-      SparkTran tran = generate(work);
-      SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
-      sparkPlan.addTran(tran);
-      sparkPlan.connect(parentTran, tran);
-      workToTranMap.put(work, tran);
-      perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
+    try {
+      for (BaseWork work : sparkWork.getAllWork()) {
+        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
+        SparkTran tran = generate(work);
+        SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
+        sparkPlan.addTran(tran);
+        sparkPlan.connect(parentTran, tran);
+        workToTranMap.put(work, tran);
+        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
+      }
+    } finally {
+      // clear all ThreadLocal cached MapWork/ReduceWork after plan generation
+      // as this may executed in a pool thread.
+      Utilities.clearWorkMap();
     }
 
     perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);

http://git-wip-us.apache.org/repos/asf/hive/blob/ba5d8545/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
index 6710c14..1de7e40 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
@@ -580,6 +580,10 @@ public class CombineHiveInputFormat<K extends WritableComparable,
V extends Writ
     if (oldPaths != null) {
       job.set(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname, oldPaths);
     }
+
+    // clear work from ThreadLocal after splits generated in case of thread is reused in
pool.
+    Utilities.clearWorkMapForConf(job);
+
     LOG.info("Number of all splits " + result.size());
     perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
     return result.toArray(new InputSplit[result.size()]);


Mime
View raw message