impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
Subject [13/50] [abbrv] incubator-impala git commit: IMPALA-4595: Ignore discarded functions after linking
Date Thu, 15 Dec 2016 23:00:39 GMT
IMPALA-4595: Ignore discarded functions after linking

For LLVM IR UDF, Impalad will link an external LLVM module
in which the IR UDF is defined with the main module. If it
happens that a symbol is defined in both modules, LLVM may
choose to discard the one defined in the external module.
The discarded function and its callee will not be present
in the linked module.

In IMPALA-4595, was compiled without any
optimization. Duplicated definition such as StringVal::null()
may have different inlining level between the external module
and the main module. When the duplicated definition in
the external module is discarded, some of its callee
functions (which are not inlined) may not be defined in the
main module so they can no longer be located in the linked
module. This trips up some code in the LlvmCodegen::LinkModule().
In particular, when parsing for functions in external module
which are materialized during linking, certain functions may
not be present due to the reason above. Impalad will hit
a DCHECK in debug build or crash due to null pointer access
in release build.

This change fixes the problem above by taking into account
that certain functions may not be defined anymore after linking.
This change also fixes two incorrect status propagation in

Change-Id: Iaa056a0c888bfcc95b412e1bc1063bb607b58ab7
Reviewed-by: Michael Ho <>
Tested-by: Impala Public Jenkins


Branch: refs/heads/hadoop-next
Commit: 9337518137ddbbdefde9738ea1dce9f978f525a8
Parents: 9baf4f0
Author: Michael Ho <>
Authored: Mon Dec 5 21:18:38 2016 -0800
Committer: Impala Public Jenkins <>
Committed: Wed Dec 7 22:52:35 2016 +0000

 be/src/codegen/                  | 17 ++++---
 be/src/service/                    |  5 +-
 be/src/testutil/                    | 53 +++++++++++++++++++-
 .../functional-query/queries/QueryTest/udf.test |  7 +++
 tests/query_test/                   |  5 ++
 5 files changed, 76 insertions(+), 11 deletions(-)
diff --git a/be/src/codegen/ b/be/src/codegen/
index b8b5db6..b856fb2 100644
--- a/be/src/codegen/
+++ b/be/src/codegen/
@@ -354,19 +354,20 @@ Status LlvmCodeGen::LinkModule(const string& file) {
   for (const string& fn_name: ref_fns) {
     Function* fn = module_->getFunction(fn_name);
-    DCHECK(fn != NULL);
-    if (fn->isMaterializable()) {
-      MaterializeFunction(fn);
+    // The global variable from source module which references 'fn' can have private
+    // linkage and it may not be linked into 'module_'.
+    if (fn != NULL && fn->isMaterializable()) {
+      RETURN_IF_ERROR(MaterializeFunction(fn));
-  // Parse materialized functions in the source module and materialize functions it
-  // references. Do it after linking so LLVM has "merged" functions defined in both
-  // modules.
+  // Parse functions in the source module materialized during linking and materialize
+  // their callees. Do it after linking so LLVM has "merged" functions defined in both
+  // modules. LLVM may not link in functions (and their callees) from source module if
+  // they're defined in destination module already.
   for (const string& fn_name: materializable_fns) {
     Function* fn = module_->getFunction(fn_name);
-    DCHECK(fn != NULL);
-    if (!fn->isMaterializable()) MaterializeCallees(fn);
+    if (fn != NULL && !fn->isMaterializable()) RETURN_IF_ERROR(MaterializeCallees(fn));
   return Status::OK();
diff --git a/be/src/service/ b/be/src/service/
index 9483ccc..b4e2906 100644
--- a/be/src/service/
+++ b/be/src/service/
@@ -131,9 +131,10 @@ Java_org_apache_impala_service_FeSupport_NativeEvalConstExprs(
     if (!status.ok()) goto error;
     LlvmCodeGen* codegen = state.codegen();
     DCHECK(codegen != NULL);
-    state.CodegenScalarFns();
+    status = state.CodegenScalarFns();
+    if (!status.ok()) goto error;
-    Status status = codegen->FinalizeModule();
+    status = codegen->FinalizeModule();
     if (!status.ok()) goto error;
diff --git a/be/src/testutil/ b/be/src/testutil/
index 56d4524..d9425d2 100644
--- a/be/src/testutil/
+++ b/be/src/testutil/
@@ -16,9 +16,13 @@
 // under the License.
 #include <udf/udf.h>
+#include <cmath>
 using namespace impala_udf;
+#define NO_INLINE __attribute__((noinline))
+#define WEAK_SYM  __attribute__((weak))
 // These functions are intended to test the "glue" that runs UDFs. Thus, the UDFs
 // themselves are kept very simple.
@@ -122,7 +126,7 @@ DecimalVal VarSum(FunctionContext* context, int n, const DecimalVal* args)
   return DecimalVal(result);
-DoubleVal __attribute__((noinline)) VarSumMultiply(FunctionContext* context,
+DoubleVal NO_INLINE VarSumMultiply(FunctionContext* context,
     const DoubleVal& d, int n, const IntVal* args) {
   if (d.is_null) return DoubleVal::null();
@@ -149,11 +153,58 @@ extern "C" StringVal
         FunctionContext* context, const StringVal& str);
 StringVal ToLower(FunctionContext* context, const StringVal& str) {
+  // StringVal::null() doesn't inline its callee when compiled without optimization.
+  // Useful for testing cases such as IMPALA-4595.
+  if (str.is_null) return StringVal::null();
           context, str);
+typedef DoubleVal (*TestFn)(const DoubleVal& base, const DoubleVal& exp);
+// This function is dropped upon linking when tested as IR UDF as it has internal linkage
+// and its only caller Pow() will be overriden upon linking.
+static DoubleVal NO_INLINE PrivateFn1(const DoubleVal& base, const DoubleVal& exp)
+#ifdef IR_COMPILE
+  return DoubleVal::null();
+  return DoubleVal(std::pow(base.val, exp.val));
+// This function is referenced in global variable 'global_array_2' even though it
+// has no caller. This is to exercise IMPALA-4595 which verifies that this function
+// still exists after linking.
+static DoubleVal PrivateFn2(const DoubleVal& base, const DoubleVal& exp) {
+  return DoubleVal(base.val + exp.val);
+// This is a constant array with internal linkage type. Its only reference is from Pow()
+// which will be overridden during linking. This array will essentially not be in the
+// module after linking. Used to exercise IMPALA-4595 when testing IR UDF.
+static volatile const TestFn global_array[1] = {PrivateFn1};
+volatile const TestFn global_array_2[1] = {PrivateFn2};
+namespace impala {
+  class MathFunctions {
+    static DoubleVal Pow(FunctionContext* ctx, const DoubleVal& base,
+        const DoubleVal& exp);
+  };
+// This function has the same signature as a built-in function (pow()) in Impalad.
+// It has a weak linkage type so it can be overridden at linking when tested as IR UDF.
+DoubleVal WEAK_SYM impala::MathFunctions::Pow(FunctionContext* context,
+    const DoubleVal& base, const DoubleVal& exp) {
+  // Just references 'global_array' to stop the compiler from complaining.
+  // This function will be overridden after linking so 'global_array' is dead
+  // when tested as an IR UDF.
+  if (base.is_null || exp.is_null || global_array[0] == NULL) return DoubleVal::null();
+  return PrivateFn1(base, exp);
 BooleanVal TestError(FunctionContext* context) {
   context->SetError("test UDF error");
   context->SetError("this shouldn't show up");
diff --git a/testdata/workloads/functional-query/queries/QueryTest/udf.test b/testdata/workloads/functional-query/queries/QueryTest/udf.test
index b3f8dfa..8da52c1 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/udf.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/udf.test
@@ -538,3 +538,10 @@ BIGINT
+---- QUERY
+select pow(3,2), xpow(3,2);
+---- TYPES
\ No newline at end of file
diff --git a/tests/query_test/ b/tests/query_test/
index d5a7cba..274ebef 100644
--- a/tests/query_test/
+++ b/tests/query_test/
@@ -369,6 +369,7 @@ drop function if exists {database}.var_sum(string...);
 drop function if exists {database}.var_sum(decimal(4,2)...);
 drop function if exists {database}.var_sum_multiply(double, int...);
 drop function if exists {database}.var_sum_multiply2(double, int...);
+drop function if exists {database}.xpow(double, double);
 drop function if exists {database}.to_lower(string);
 drop function if exists {database}.constant_timestamp();
 drop function if exists {database}.validate_arg_type(string);
@@ -464,6 +465,10 @@ create function {database}.var_sum_multiply2(double, int...) returns
 location '{location}'
+create function {database}.xpow(double, double) returns double
+location '{location}'
 create function {database}.to_lower(string) returns string
 location '{location}'

View raw message