impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mi...@apache.org
Subject [1/8] incubator-impala git commit: IMPALA-6092: avoid drop/create function interactions in e2e tests
Date Tue, 28 Nov 2017 18:41:08 GMT
Repository: incubator-impala
Updated Branches:
  refs/heads/master a86cc7a3d -> d3afe3075


IMPALA-6092: avoid drop/create function interactions in e2e tests

The e2e unit tests for udfs can interact via the backend
lib_cache, causing test flakes. IMPALA-6215 explains a
race between the lib_cache and UdfExecutor in the frontend
which is the likely the root cause.
Two e2e tests use the same jar (test_java_udfs and
test_udf_invalid_symbol), test_udf_invalid_symbol drops a
function from that jar, which causes the use of that jar to
fail in the test_java_udfs test. Since the state of lib_cache
is per process, its state causes these interactions across
unit tests.
This change avoids the interactions by using separate jars for
the separate tests.

Change-Id: Ica3538788b1d2ab5e361261e2ade62780b838e65
Reviewed-on: http://gerrit.cloudera.org:8080/8593
Reviewed-by: Dan Hecht <dhecht@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/628f19ed
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/628f19ed
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/628f19ed

Branch: refs/heads/master
Commit: 628f19ed0b951ba559da0c6b00bfa6d472269180
Parents: a86cc7a
Author: Vuk Ercegovac <vercegovac@cloudera.com>
Authored: Tue Nov 14 08:48:59 2017 -0800
Committer: Impala Public Jenkins <impala-public-jenkins@gerrit.cloudera.org>
Committed: Mon Nov 27 21:20:20 2017 +0000

----------------------------------------------------------------------
 be/src/runtime/lib-cache.h                       |  4 ++--
 .../apache/impala/hive/executor/UdfExecutor.java |  2 +-
 tests/query_test/test_udfs.py                    | 19 +++++++++++++------
 3 files changed, 16 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/628f19ed/be/src/runtime/lib-cache.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/lib-cache.h b/be/src/runtime/lib-cache.h
index b492d1f..4a564ee 100644
--- a/be/src/runtime/lib-cache.h
+++ b/be/src/runtime/lib-cache.h
@@ -42,7 +42,7 @@ class RuntimeState;
 /// Locking strategy: We don't want to grab a big lock across all operations since
 /// one of the operations is copying a file from HDFS. With one lock that would
 /// prevent any UDFs from running on the system. Instead, we have a global lock
-/// that is taken when doing the cache lookup, but is not taking during any blocking calls.
+/// that is taken when doing the cache lookup, but is not taken during any blocking calls.
 /// During the block calls, we take the per-lib lock.
 //
 /// Entry lifetime management: We cannot delete the entry while a query is
@@ -140,7 +140,7 @@ class LibCache {
   /// Returns the cache entry for 'hdfs_lib_file'. If this library has not been
   /// copied locally, it will copy it and add a new LibCacheEntry to 'lib_cache_'.
   /// Result is returned in *entry.
-  /// No locks should be take before calling this. On return the entry's lock is
+  /// No locks should be taken before calling this. On return the entry's lock is
   /// taken and returned in *entry_lock.
   /// If an error is returned, there will be no entry in lib_cache_ and *entry is NULL.
   Status GetCacheEntry(const std::string& hdfs_lib_file, LibType type,

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/628f19ed/fe/src/main/java/org/apache/impala/hive/executor/UdfExecutor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/hive/executor/UdfExecutor.java b/fe/src/main/java/org/apache/impala/hive/executor/UdfExecutor.java
index df48cb7..954dfb0 100644
--- a/fe/src/main/java/org/apache/impala/hive/executor/UdfExecutor.java
+++ b/fe/src/main/java/org/apache/impala/hive/executor/UdfExecutor.java
@@ -621,7 +621,7 @@ public class UdfExecutor {
         .append(Joiner.on("\n    ").join(signatures));
       throw new ImpalaRuntimeException(sb.toString());
     } catch (MalformedURLException e) {
-      throw new ImpalaRuntimeException("Unable load jar.", e);
+      throw new ImpalaRuntimeException("Unable to load jar.", e);
     } catch (SecurityException e) {
       throw new ImpalaRuntimeException("Unable to load function.", e);
     } catch (ClassNotFoundException e) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/628f19ed/tests/query_test/test_udfs.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 37d689f..a9d0974 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -404,15 +404,22 @@ class TestUdfTargeted(TestUdfBase):
   def test_udf_invalid_symbol(self, vector, unique_database):
     """ IMPALA-1642: Impala crashes if the symbol for a Hive UDF doesn't exist
         Crashing is non-deterministic so we run the UDF several times."""
+    src_udf_path = os.path.join(
+        os.environ['IMPALA_HOME'], 'testdata/udfs/impala-hive-udfs.jar')
+    tgt_udf_path = get_fs_path(
+        '/test-warehouse/{0}.db/impala-hive-udfs.jar'.format(unique_database))
     drop_fn_stmt = (
         "drop function if exists `{0}`.fn_invalid_symbol(STRING)".format(unique_database))
     create_fn_stmt = (
         "create function `{0}`.fn_invalid_symbol(STRING) returns "
         "STRING LOCATION '{1}' SYMBOL='not.a.Symbol'".format(
-            unique_database,
-            get_fs_path('/test-warehouse/impala-hive-udfs.jar')))
+            unique_database, tgt_udf_path))
     query = "select `{0}`.fn_invalid_symbol('test')".format(unique_database)
 
+    # Dropping the function can interact with other tests whose Java classes are in
+    # the same jar. Use a copy of the jar to avoid unintended interactions.
+    # See IMPALA-6215 and IMPALA-6092 for examples.
+    check_call(["hadoop", "fs", "-put", "-f", src_udf_path, tgt_udf_path])
     self.client.execute(drop_fn_stmt)
     self.client.execute(create_fn_stmt)
     for _ in xrange(5):
@@ -425,7 +432,8 @@ class TestUdfTargeted(TestUdfBase):
     """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
     on HDFS"""
     # Copy hive-exec.jar to a temporary file
-    jar_path = get_fs_path("/test-warehouse/" + get_random_id(5) + ".jar")
+    jar_path = get_fs_path("/test-warehouse/{0}.db/".format(unique_database)
+                           + get_random_id(5) + ".jar")
     hive_jar = get_fs_path("/test-warehouse/hive-exec.jar")
     check_call(["hadoop", "fs", "-cp", hive_jar, jar_path])
     drop_fn_stmt = (
@@ -474,7 +482,7 @@ class TestUdfTargeted(TestUdfBase):
     new_udf = os.path.join(
         os.environ['IMPALA_HOME'], 'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
     udf_dst = get_fs_path(
-        '/test-warehouse/impala-hive-udfs2-{0}.jar'.format(unique_database))
+        '/test-warehouse/{0}.db/impala-hive-udfs.jar'.format(unique_database))
 
     drop_fn_stmt = (
         'drop function if exists `{0}`.`udf_update_test_drop`()'.format(unique_database))
@@ -507,7 +515,7 @@ class TestUdfTargeted(TestUdfBase):
     new_udf = os.path.join(
         os.environ['IMPALA_HOME'], 'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
     udf_dst = get_fs_path(
-        '/test-warehouse/impala-hive-udfs3-{0}.jar'.format(unique_database))
+        '/test-warehouse/{0}.db/impala-hive-udfs.jar'.format(unique_database))
     old_function_name = "udf_update_test_create1"
     new_function_name = "udf_update_test_create2"
 
@@ -570,4 +578,3 @@ class TestUdfTargeted(TestUdfBase):
     results = self.client.fetch(query, handle, -1)
     assert results.success
     assert len(results.data) == 9999
-


Mime
View raw message