Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id BBCB318AD0 for ; Wed, 14 Oct 2015 01:49:58 +0000 (UTC) Received: (qmail 20777 invoked by uid 500); 14 Oct 2015 01:49:58 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 20727 invoked by uid 500); 14 Oct 2015 01:49:58 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 20716 invoked by uid 99); 14 Oct 2015 01:49:58 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 14 Oct 2015 01:49:58 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 24D1CE0286; Wed, 14 Oct 2015 01:49:58 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: vgumashta@apache.org To: commits@hive.apache.org Message-Id: <72a0f6f696004bb4ae411bea7256d0ef@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-11499: Datanucleus leaks classloaders when used using embedded metastore with HiveServer2 with UDFs (Vaibhav Gumashta reviewed by Thejas Nair) Date: Wed, 14 Oct 2015 01:49:58 +0000 (UTC) Repository: hive Updated Branches: refs/heads/master d7f1b465b -> 1d2e5eed3 HIVE-11499: Datanucleus leaks classloaders when used using embedded metastore with HiveServer2 with UDFs (Vaibhav Gumashta reviewed by Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1d2e5eed Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1d2e5eed Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1d2e5eed Branch: refs/heads/master Commit: 1d2e5eed3eb755bcf8a6d2b85ad0aa9a0bb7b0d9 Parents: d7f1b46 Author: Vaibhav Gumashta Authored: Tue Oct 13 18:49:30 2015 -0700 Committer: Vaibhav Gumashta Committed: Tue Oct 13 18:49:30 2015 -0700 ---------------------------------------------------------------------- data/files/identity_udf.jar | Bin 0 -> 710 bytes .../apache/hive/jdbc/TestJdbcWithMiniHS2.java | 98 +++++++++++++++++-- .../hive/metastore/HiveMetaStoreClient.java | 5 + .../hadoop/hive/metastore/IMetaStoreClient.java | 7 ++ .../hadoop/hive/metastore/ObjectStore.java | 33 +++++++ .../hadoop/hive/ql/session/SessionState.java | 18 ++++ 6 files changed, 155 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/data/files/identity_udf.jar ---------------------------------------------------------------------- diff --git a/data/files/identity_udf.jar b/data/files/identity_udf.jar new file mode 100644 index 0000000..8170995 Binary files /dev/null and b/data/files/identity_udf.jar differ http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index 8ba2a12..9c8cf02 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import java.lang.reflect.Field; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; @@ -51,17 +52,23 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.ObjectStore; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hive.jdbc.miniHS2.MiniHS2; +import org.datanucleus.ClassLoaderResolver; +import org.datanucleus.NucleusContext; +import org.datanucleus.api.jdo.JDOPersistenceManagerFactory; import org.junit.After; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; public class TestJdbcWithMiniHS2 { private static MiniHS2 miniHS2 = null; - private static Path dataFilePath; + private static String dataFileDir; + private static Path kvDataFilePath; private static final String tmpDir = System.getProperty("test.tmp.dir"); private Connection hs2Conn = null; @@ -72,9 +79,8 @@ public class TestJdbcWithMiniHS2 { HiveConf conf = new HiveConf(); conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); miniHS2 = new MiniHS2(conf); - String dataFileDir = conf.get("test.data.files").replace('\\', '/') - .replace("c:", ""); - dataFilePath = new Path(dataFileDir, "kv1.txt"); + dataFileDir = conf.get("test.data.files").replace('\\', '/').replace("c:", ""); + kvDataFilePath = new Path(dataFileDir, "kv1.txt"); Map confOverlay = new HashMap(); miniHS2.start(confOverlay); } @@ -114,7 +120,7 @@ public class TestJdbcWithMiniHS2 { // load data stmt.execute("load data local inpath '" - + dataFilePath.toString() + "' into table " + tableName); + + kvDataFilePath.toString() + "' into table " + tableName); ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName); assertTrue(res.next()); @@ -135,7 +141,7 @@ public class TestJdbcWithMiniHS2 { // load data stmt.execute("load data local inpath '" - + dataFilePath.toString() + "' into table " + tableName); + + kvDataFilePath.toString() + "' into table " + tableName); ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName); assertTrue(res.next()); @@ -723,4 +729,84 @@ public class TestJdbcWithMiniHS2 { fail("Not expecting exception: " + e); } } + + /** + * Tests that DataNucleus' NucleusContext.classLoaderResolverMap clears cached class objects (& hence + * doesn't leak classloaders) on closing any session + * + * @throws Exception + */ + @Test + public void testAddJarDataNucleusUnCaching() throws Exception { + Path jarFilePath = new Path(dataFileDir, "identity_udf.jar"); + Connection conn = getConnection(miniHS2.getJdbcURL(), "foo", "bar"); + String tableName = "testAddJar"; + Statement stmt = conn.createStatement(); + stmt.execute("SET hive.support.concurrency = false"); + // Create table + stmt.execute("DROP TABLE IF EXISTS " + tableName); + stmt.execute("CREATE TABLE " + tableName + " (key INT, value STRING)"); + // Load data + stmt.execute("LOAD DATA LOCAL INPATH '" + kvDataFilePath.toString() + "' INTO TABLE " + + tableName); + ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName); + // Ensure table is populated + assertTrue(res.next()); + + int mapSizeBeforeClose; + int mapSizeAfterClose; + // Add the jar file + stmt.execute("ADD JAR " + jarFilePath.toString()); + // Create a temporary function using the jar + stmt.execute("CREATE TEMPORARY FUNCTION func AS 'IdentityStringUDF'"); + // Execute the UDF + stmt.execute("SELECT func(value) from " + tableName); + mapSizeBeforeClose = getNucleusClassLoaderResolverMapSize(); + System.out + .println("classLoaderResolverMap size before connection close: " + mapSizeBeforeClose); + // Cache size should be > 0 now + Assert.assertTrue(mapSizeBeforeClose > 0); + conn.close(); + mapSizeAfterClose = getNucleusClassLoaderResolverMapSize(); + System.out.println("classLoaderResolverMap size after connection close: " + mapSizeAfterClose); + // Cache size should be 0 now + Assert.assertTrue("Failed; NucleusContext classLoaderResolverMap size: " + mapSizeAfterClose, + mapSizeAfterClose == 0); + } + + @SuppressWarnings("unchecked") + private int getNucleusClassLoaderResolverMapSize() { + Field classLoaderResolverMap; + Field pmf; + JDOPersistenceManagerFactory jdoPmf = null; + NucleusContext nc = null; + Map cMap; + try { + pmf = ObjectStore.class.getDeclaredField("pmf"); + if (pmf != null) { + pmf.setAccessible(true); + jdoPmf = (JDOPersistenceManagerFactory) pmf.get(null); + if (jdoPmf != null) { + nc = jdoPmf.getNucleusContext(); + } + } + } catch (Exception e) { + System.out.println(e); + } + if (nc != null) { + try { + classLoaderResolverMap = NucleusContext.class.getDeclaredField("classLoaderResolverMap"); + if (classLoaderResolverMap != null) { + classLoaderResolverMap.setAccessible(true); + cMap = (Map) classLoaderResolverMap.get(nc); + if (cMap != null) { + return cMap.size(); + } + } + } catch (Exception e) { + System.out.println(e); + } + } + return -1; + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 8e32966..d92c284 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -302,6 +302,11 @@ public class HiveMetaStoreClient implements IMetaStoreClient { } @Override + public boolean isLocalMetaStore() { + return localMetaStore; + } + + @Override public boolean isCompatibleWith(HiveConf conf) { if (currentMetaVars == null) { return false; // recreate http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index 77820ae..f3a23f5 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -104,6 +104,13 @@ public interface IMetaStoreClient { void setHiveAddedJars(String addedJars); /** + * Returns true if the current client is using an in process metastore (local metastore). + * + * @return + */ + boolean isLocalMetaStore(); + + /** * Tries to reconnect this MetaStoreClient to the MetaStore. */ void reconnect() throws MetaException; http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 31f8ccf..136eff5 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.metastore; import static org.apache.commons.lang.StringUtils.join; import java.io.IOException; +import java.lang.reflect.Field; import java.net.InetAddress; import java.net.URI; import java.nio.ByteBuffer; @@ -154,6 +155,9 @@ import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.HiveStringUtils; import org.apache.thrift.TException; +import org.datanucleus.ClassLoaderResolver; +import org.datanucleus.NucleusContext; +import org.datanucleus.api.jdo.JDOPersistenceManagerFactory; import org.datanucleus.store.rdbms.exceptions.MissingTableException; import com.google.common.collect.Lists; @@ -7656,4 +7660,33 @@ public class ObjectStore implements RawStore, Configurable { public void putFileMetadata(List fileIds, List metadata) { // Not supported for now. } + + /** + * Removed cached classloaders from DataNucleus + * DataNucleus caches classloaders in NucleusContext. + * In UDFs, this can result in classloaders not getting GCed resulting in PermGen leaks. + * This is particularly an issue when using embedded metastore with HiveServer2, + * since the current classloader gets modified with each new add jar, + * becoming the classloader for downstream classes, which DataNucleus ends up using. + * The NucleusContext cache gets freed up only on calling a close on it. + * We're not closing NucleusContext since it does a bunch of other things which we don't want. + * We're not clearing the cache HashMap by calling HashMap#clear to avoid concurrency issues. + */ + public static void unCacheDataNucleusClassLoaders() { + PersistenceManagerFactory pmf = ObjectStore.getPMF(); + if ((pmf != null) && (pmf instanceof JDOPersistenceManagerFactory)) { + JDOPersistenceManagerFactory jdoPmf = (JDOPersistenceManagerFactory) pmf; + NucleusContext nc = jdoPmf.getNucleusContext(); + try { + Field classLoaderResolverMap = + NucleusContext.class.getDeclaredField("classLoaderResolverMap"); + classLoaderResolverMap.setAccessible(true); + classLoaderResolverMap.set(nc, new HashMap()); + LOG.debug("Removed cached classloaders from DataNucleus NucleusContext"); + } catch (Exception e) { + LOG.warn(e); + } + } + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/1d2e5eed/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 41b4bb1..92ac209 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -23,6 +23,8 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.net.URI; import java.net.URISyntaxException; import java.net.URLClassLoader; @@ -53,6 +55,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.ObjectStore; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.ql.MapRedStats; import org.apache.hadoop.hive.ql.exec.Registry; @@ -1537,6 +1540,21 @@ public class SessionState { closeSparkSession(); registry.closeCUDFLoaders(); dropSessionPaths(conf); + unCacheDataNucleusClassLoaders(); + } + + private void unCacheDataNucleusClassLoaders() { + try { + Hive threadLocalHive = Hive.get(conf); + if ((threadLocalHive != null) && (threadLocalHive.getMSC() != null) + && (threadLocalHive.getMSC().isLocalMetaStore())) { + if (conf.getVar(ConfVars.METASTORE_RAW_STORE_IMPL).equals(ObjectStore.class.getName())) { + ObjectStore.unCacheDataNucleusClassLoaders(); + } + } + } catch (Exception e) { + LOG.info(e); + } } public void closeSparkSession() {