hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vgumas...@apache.org
Subject hive git commit: HIVE-11499: Datanucleus leaks classloaders when used using embedded metastore with HiveServer2 with UDFs (Vaibhav Gumashta reviewed by Thejas Nair)
Date Wed, 14 Oct 2015 01:49:58 GMT
Repository: hive
Updated Branches:
  refs/heads/master d7f1b465b -> 1d2e5eed3


HIVE-11499: Datanucleus leaks classloaders when used using embedded metastore with HiveServer2
with UDFs (Vaibhav Gumashta reviewed by Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1d2e5eed
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1d2e5eed
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1d2e5eed

Branch: refs/heads/master
Commit: 1d2e5eed3eb755bcf8a6d2b85ad0aa9a0bb7b0d9
Parents: d7f1b46
Author: Vaibhav Gumashta <vgumashta@apache.org>
Authored: Tue Oct 13 18:49:30 2015 -0700
Committer: Vaibhav Gumashta <vgumashta@apache.org>
Committed: Tue Oct 13 18:49:30 2015 -0700

----------------------------------------------------------------------
 data/files/identity_udf.jar                     | Bin 0 -> 710 bytes
 .../apache/hive/jdbc/TestJdbcWithMiniHS2.java   |  98 +++++++++++++++++--
 .../hive/metastore/HiveMetaStoreClient.java     |   5 +
 .../hadoop/hive/metastore/IMetaStoreClient.java |   7 ++
 .../hadoop/hive/metastore/ObjectStore.java      |  33 +++++++
 .../hadoop/hive/ql/session/SessionState.java    |  18 ++++
 6 files changed, 155 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/data/files/identity_udf.jar
----------------------------------------------------------------------
diff --git a/data/files/identity_udf.jar b/data/files/identity_udf.jar
new file mode 100644
index 0000000..8170995
Binary files /dev/null and b/data/files/identity_udf.jar differ

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
index 8ba2a12..9c8cf02 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.lang.reflect.Field;
 import java.sql.Connection;
 import java.sql.DriverManager;
 import java.sql.ResultSet;
@@ -51,17 +52,23 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.ObjectStore;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hive.jdbc.miniHS2.MiniHS2;
+import org.datanucleus.ClassLoaderResolver;
+import org.datanucleus.NucleusContext;
+import org.datanucleus.api.jdo.JDOPersistenceManagerFactory;
 import org.junit.After;
 import org.junit.AfterClass;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class TestJdbcWithMiniHS2 {
   private static MiniHS2 miniHS2 = null;
-  private static Path dataFilePath;
+  private static String dataFileDir;
+  private static Path kvDataFilePath;
   private static final String tmpDir = System.getProperty("test.tmp.dir");
 
   private Connection hs2Conn = null;
@@ -72,9 +79,8 @@ public class TestJdbcWithMiniHS2 {
     HiveConf conf = new HiveConf();
     conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
     miniHS2 = new MiniHS2(conf);
-    String dataFileDir = conf.get("test.data.files").replace('\\', '/')
-        .replace("c:", "");
-    dataFilePath = new Path(dataFileDir, "kv1.txt");
+    dataFileDir = conf.get("test.data.files").replace('\\', '/').replace("c:", "");
+    kvDataFilePath = new Path(dataFileDir, "kv1.txt");
     Map<String, String> confOverlay = new HashMap<String, String>();
     miniHS2.start(confOverlay);
   }
@@ -114,7 +120,7 @@ public class TestJdbcWithMiniHS2 {
 
     // load data
     stmt.execute("load data local inpath '"
-        + dataFilePath.toString() + "' into table " + tableName);
+        + kvDataFilePath.toString() + "' into table " + tableName);
 
     ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName);
     assertTrue(res.next());
@@ -135,7 +141,7 @@ public class TestJdbcWithMiniHS2 {
 
     // load data
     stmt.execute("load data local inpath '"
-        + dataFilePath.toString() + "' into table " + tableName);
+        + kvDataFilePath.toString() + "' into table " + tableName);
 
     ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName);
     assertTrue(res.next());
@@ -723,4 +729,84 @@ public class TestJdbcWithMiniHS2 {
       fail("Not expecting exception: " + e);
     }
   }
+
+  /**
+   * Tests that DataNucleus' NucleusContext.classLoaderResolverMap clears cached class objects
(& hence
+   * doesn't leak classloaders) on closing any session
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testAddJarDataNucleusUnCaching() throws Exception {
+    Path jarFilePath = new Path(dataFileDir, "identity_udf.jar");
+    Connection conn = getConnection(miniHS2.getJdbcURL(), "foo", "bar");
+    String tableName = "testAddJar";
+    Statement stmt = conn.createStatement();
+    stmt.execute("SET hive.support.concurrency = false");
+    // Create table
+    stmt.execute("DROP TABLE IF EXISTS " + tableName);
+    stmt.execute("CREATE TABLE " + tableName + " (key INT, value STRING)");
+    // Load data
+    stmt.execute("LOAD DATA LOCAL INPATH '" + kvDataFilePath.toString() + "' INTO TABLE "
+        + tableName);
+    ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName);
+    // Ensure table is populated
+    assertTrue(res.next());
+
+    int mapSizeBeforeClose;
+    int mapSizeAfterClose;
+    // Add the jar file
+    stmt.execute("ADD JAR " + jarFilePath.toString());
+    // Create a temporary function using the jar
+    stmt.execute("CREATE TEMPORARY FUNCTION func AS 'IdentityStringUDF'");
+    // Execute the UDF
+    stmt.execute("SELECT func(value) from " + tableName);
+    mapSizeBeforeClose = getNucleusClassLoaderResolverMapSize();
+    System.out
+        .println("classLoaderResolverMap size before connection close: " + mapSizeBeforeClose);
+    // Cache size should be > 0 now
+    Assert.assertTrue(mapSizeBeforeClose > 0);
+    conn.close();
+    mapSizeAfterClose = getNucleusClassLoaderResolverMapSize();
+    System.out.println("classLoaderResolverMap size after connection close: " + mapSizeAfterClose);
+    // Cache size should be 0 now
+    Assert.assertTrue("Failed; NucleusContext classLoaderResolverMap size: " + mapSizeAfterClose,
+        mapSizeAfterClose == 0);
+  }
+
+  @SuppressWarnings("unchecked")
+  private int getNucleusClassLoaderResolverMapSize() {
+    Field classLoaderResolverMap;
+    Field pmf;
+    JDOPersistenceManagerFactory jdoPmf = null;
+    NucleusContext nc = null;
+    Map<String, ClassLoaderResolver> cMap;
+    try {
+      pmf = ObjectStore.class.getDeclaredField("pmf");
+      if (pmf != null) {
+        pmf.setAccessible(true);
+        jdoPmf = (JDOPersistenceManagerFactory) pmf.get(null);
+        if (jdoPmf != null) {
+          nc = jdoPmf.getNucleusContext();
+        }
+      }
+    } catch (Exception e) {
+      System.out.println(e);
+    }
+    if (nc != null) {
+      try {
+        classLoaderResolverMap = NucleusContext.class.getDeclaredField("classLoaderResolverMap");
+        if (classLoaderResolverMap != null) {
+          classLoaderResolverMap.setAccessible(true);
+          cMap = (Map<String, ClassLoaderResolver>) classLoaderResolverMap.get(nc);
+          if (cMap != null) {
+            return cMap.size();
+          }
+        }
+      } catch (Exception e) {
+        System.out.println(e);
+      }
+    }
+    return -1;
+  }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 8e32966..d92c284 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -302,6 +302,11 @@ public class HiveMetaStoreClient implements IMetaStoreClient {
   }
 
   @Override
+  public boolean isLocalMetaStore() {
+    return localMetaStore;
+  }
+
+  @Override
   public boolean isCompatibleWith(HiveConf conf) {
     if (currentMetaVars == null) {
       return false; // recreate

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
index 77820ae..f3a23f5 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
@@ -104,6 +104,13 @@ public interface IMetaStoreClient {
   void setHiveAddedJars(String addedJars);
 
   /**
+   * Returns true if the current client is using an in process metastore (local metastore).
+   *
+   * @return
+   */
+  boolean isLocalMetaStore();
+
+  /**
    *  Tries to reconnect this MetaStoreClient to the MetaStore.
    */
   void reconnect() throws MetaException;

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 31f8ccf..136eff5 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.metastore;
 import static org.apache.commons.lang.StringUtils.join;
 
 import java.io.IOException;
+import java.lang.reflect.Field;
 import java.net.InetAddress;
 import java.net.URI;
 import java.nio.ByteBuffer;
@@ -154,6 +155,9 @@ import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hive.common.util.HiveStringUtils;
 import org.apache.thrift.TException;
+import org.datanucleus.ClassLoaderResolver;
+import org.datanucleus.NucleusContext;
+import org.datanucleus.api.jdo.JDOPersistenceManagerFactory;
 import org.datanucleus.store.rdbms.exceptions.MissingTableException;
 
 import com.google.common.collect.Lists;
@@ -7656,4 +7660,33 @@ public class ObjectStore implements RawStore, Configurable {
   public void putFileMetadata(List<Long> fileIds, List<ByteBuffer> metadata)
{
     // Not supported for now.
   }
+
+  /**
+   * Removed cached classloaders from DataNucleus
+   * DataNucleus caches classloaders in NucleusContext.
+   * In UDFs, this can result in classloaders not getting GCed resulting in PermGen leaks.
+   * This is particularly an issue when using embedded metastore with HiveServer2,
+   * since the current classloader gets modified with each new add jar,
+   * becoming the classloader for downstream classes, which DataNucleus ends up using.
+   * The NucleusContext cache gets freed up only on calling a close on it.
+   * We're not closing NucleusContext since it does a bunch of other things which we don't
want.
+   * We're not clearing the cache HashMap by calling HashMap#clear to avoid concurrency issues.
+   */
+  public static void unCacheDataNucleusClassLoaders() {
+    PersistenceManagerFactory pmf = ObjectStore.getPMF();
+    if ((pmf != null) && (pmf instanceof JDOPersistenceManagerFactory)) {
+      JDOPersistenceManagerFactory jdoPmf = (JDOPersistenceManagerFactory) pmf;
+      NucleusContext nc = jdoPmf.getNucleusContext();
+      try {
+        Field classLoaderResolverMap =
+            NucleusContext.class.getDeclaredField("classLoaderResolverMap");
+        classLoaderResolverMap.setAccessible(true);
+        classLoaderResolverMap.set(nc, new HashMap<String, ClassLoaderResolver>());
+        LOG.debug("Removed cached classloaders from DataNucleus NucleusContext");
+      } catch (Exception e) {
+        LOG.warn(e);
+      }
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 41b4bb1..92ac209 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -23,6 +23,8 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URLClassLoader;
@@ -53,6 +55,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.ObjectStore;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.ql.MapRedStats;
 import org.apache.hadoop.hive.ql.exec.Registry;
@@ -1537,6 +1540,21 @@ public class SessionState {
     closeSparkSession();
     registry.closeCUDFLoaders();
     dropSessionPaths(conf);
+    unCacheDataNucleusClassLoaders();
+  }
+
+  private void unCacheDataNucleusClassLoaders() {
+    try {
+      Hive threadLocalHive = Hive.get(conf);
+      if ((threadLocalHive != null) && (threadLocalHive.getMSC() != null)
+          && (threadLocalHive.getMSC().isLocalMetaStore())) {
+        if (conf.getVar(ConfVars.METASTORE_RAW_STORE_IMPL).equals(ObjectStore.class.getName()))
{
+          ObjectStore.unCacheDataNucleusClassLoaders();
+        }
+      }
+    } catch (Exception e) {
+      LOG.info(e);
+    }
   }
 
   public void closeSparkSession() {


Mime
View raw message