hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gunt...@apache.org
Subject svn commit: r1659760 - in /hive/branches/llap/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/optimizer/physical/ test/queries/clientpositive/ test/results/clientpositive/tez/
Date Sat, 14 Feb 2015 08:35:07 GMT
Author: gunther
Date: Sat Feb 14 08:35:06 2015
New Revision: 1659760

URL: http://svn.apache.org/r1659760
Log:
HIVE-9694: LLAP: add check for udfs/udafs to llapdecider (Gunther Hagleitner)

Modified:
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
    hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q
    hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1659760&r1=1659759&r2=1659760&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Sat
Feb 14 08:35:06 2015
@@ -1654,18 +1654,6 @@ public final class FunctionRegistry {
   }
 
   /**
-   * Get the UDF class from an exprNodeDesc. Returns null if the exprNodeDesc
-   * does not contain a UDF class.
-   */
-  private static Class<? extends GenericUDF> getUDFClassFromExprDesc(ExprNodeDesc desc)
{
-    if (!(desc instanceof ExprNodeGenericFuncDesc)) {
-      return null;
-    }
-    ExprNodeGenericFuncDesc genericFuncDesc = (ExprNodeGenericFuncDesc) desc;
-    return genericFuncDesc.getGenericUDF().getClass();
-  }
-
-  /**
    * Returns whether a GenericUDF is deterministic or not.
    */
   public static boolean isDeterministic(GenericUDF genericUDF) {
@@ -1755,7 +1743,7 @@ public final class FunctionRegistry {
    * Returns whether the exprNodeDesc is a node of "positive".
    */
   public static boolean isOpPositive(ExprNodeDesc desc) {
-    Class<? extends GenericUDF> udfClass = getUDFClassFromExprDesc(desc);
+    Class<? extends GenericUDF> udfClass = getGenericUDFClassFromExprDesc(desc);
     return GenericUDFOPPositive.class == udfClass;
   }
 
@@ -2040,10 +2028,15 @@ public final class FunctionRegistry {
    * @return True iff the fnExpr represents a hive built-in function.
    */
   public static boolean isNativeFuncExpr(ExprNodeGenericFuncDesc fnExpr) {
-    Class<?> udfClass = getUDFClassFromExprDesc(fnExpr);
-    if (udfClass == null) {
-      udfClass = getGenericUDFClassFromExprDesc(fnExpr);
+    Class<?> udfClass = null;
+
+    GenericUDF udf = fnExpr.getGenericUDF();
+    if (udf instanceof GenericUDFBridge) {
+      udfClass = ((GenericUDFBridge) udf).getUdfClass();
+    } else {
+      udfClass = udf.getClass();
     }
+
     return nativeUdfs.contains(udfClass);
   }
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java?rev=1659760&r1=1659759&r2=1659760&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
(original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
Sat Feb 14 08:35:06 2015
@@ -18,11 +18,19 @@
 
 package org.apache.hadoop.hive.ql.optimizer.physical;
 
+import static org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider.LlapMode.all;
+import static org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider.LlapMode.auto;
+import static org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider.LlapMode.map;
+import static org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider.LlapMode.none;
+
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.Deque;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Stack;
@@ -30,8 +38,13 @@ import java.util.Stack;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
@@ -46,13 +59,16 @@ import org.apache.hadoop.hive.ql.lib.Rul
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.lib.TaskGraphWalker;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.hive.ql.plan.TezWork;
 
-import static org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider.LlapMode.*;
+import com.google.common.base.Joiner;
 
 /**
  * LlapDecider takes care of tagging certain vertices in the execution
@@ -133,7 +149,7 @@ public class LlapDecider implements Phys
       // first we check if we *can* run in llap. If we need to use
       // user code to do so (script/udf) we don't.
       if (!evaluateOperators(work)) {
-	LOG.info("some operators cannot be run in llap");
+        LOG.info("some operators cannot be run in llap");
         return false;
       }
 
@@ -195,15 +211,109 @@ public class LlapDecider implements Phys
       return true;
     }
 
+    private boolean checkExpression(ExprNodeDesc expr) {
+      Deque<ExprNodeDesc> exprs = new LinkedList<ExprNodeDesc>();
+      exprs.add(expr);
+      while (!exprs.isEmpty()) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug(String.format("Checking '%s'",expr.getExprString()));
+        }
+
+        ExprNodeDesc cur = exprs.removeFirst();
+        if (cur == null) continue;
+        if (cur.getChildren() != null) {
+	  exprs.addAll(cur.getChildren());
+	}
+
+        if (cur instanceof ExprNodeGenericFuncDesc) {
+	  // getRequiredJars is currently broken (requires init in some cases before you can call
it)
+          // String[] jars = ((ExprNodeGenericFuncDesc)cur).getGenericUDF().getRequiredJars();
+          // if (jars != null && !(jars.length == 0)) {
+          //   LOG.info(String.format("%s requires %s", cur.getExprString(), Joiner.on(",
").join(jars)));
+          //   return false;
+          // }
+
+          if (!FunctionRegistry.isNativeFuncExpr((ExprNodeGenericFuncDesc)cur)) {
+            LOG.info("Not a built-in function: " + cur.getExprString());
+            return false;
+          }
+        }
+      }
+      return true;
+    }
+
+    private boolean checkAggregator(AggregationDesc agg) throws SemanticException {
+      if (LOG.isDebugEnabled()) {
+	LOG.debug(String.format("Checking '%s'", agg.getExprString()));
+      }
+
+      boolean result = checkExpressions(agg.getParameters());
+      FunctionInfo fi = FunctionRegistry.getFunctionInfo(agg.getGenericUDAFName());
+      result = result && (fi != null) && fi.isNative();
+      if (!result) {
+        LOG.info("Aggregator is not native: " + agg.getExprString());
+      }
+      return result;
+    }
+
+    private boolean checkExpressions(Collection<ExprNodeDesc> exprs) {
+      boolean result = true;
+      for (ExprNodeDesc expr: exprs) {
+        result = result && checkExpression(expr);
+      }
+      return result;
+    }
+
+    private boolean checkAggregators(Collection<AggregationDesc> aggs) {
+      boolean result = true;
+      try {
+	for (AggregationDesc agg: aggs) {
+	  result = result && checkAggregator(agg);
+	}
+      } catch (SemanticException e) {
+	LOG.warn("Exception testing aggregators.",e);
+	result = false;
+      }
+      return result;
+    }
+
     private Map<Rule, NodeProcessor> getRules() {
       Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
-      opRules.put(new RuleRegExp("No scripts", ScriptOperator.getOperatorName() + ".*"),
+      opRules.put(new RuleRegExp("No scripts", ScriptOperator.getOperatorName() + "%"),
           new NodeProcessor() {
           public Object process(Node n, Stack<Node> s, NodeProcessorCtx c,
               Object... os) {
             return new Boolean(false);
           }
         });
+      opRules.put(new RuleRegExp("No user code in fil",
+              FilterOperator.getOperatorName() + "%"),
+          new NodeProcessor() {
+          public Object process(Node n, Stack<Node> s, NodeProcessorCtx c,
+              Object... os) {
+            ExprNodeDesc expr = ((FilterOperator)n).getConf().getPredicate();
+            return new Boolean(checkExpression(expr));
+          }
+        });
+      opRules.put(new RuleRegExp("No user code in gby",
+              GroupByOperator.getOperatorName() + "%"),
+          new NodeProcessor() {
+          public Object process(Node n, Stack<Node> s, NodeProcessorCtx c,
+              Object... os) {
+            List<AggregationDesc> aggs = ((GroupByOperator)n).getConf().getAggregators();
+            return new Boolean(checkAggregators(aggs));
+          }
+        });
+      opRules.put(new RuleRegExp("No user code in select",
+              SelectOperator.getOperatorName() + "%"),
+          new NodeProcessor() {
+          public Object process(Node n, Stack<Node> s, NodeProcessorCtx c,
+              Object... os) {
+            List<ExprNodeDesc> exprs = ((SelectOperator)n).getConf().getColList();
+            return new Boolean(checkExpressions(exprs));
+          }
+        });
+
       return opRules;
     }
 

Modified: hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q?rev=1659760&r1=1659759&r2=1659760&view=diff
==============================================================================
--- hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q (original)
+++ hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q Sat Feb 14 08:35:06
2015
@@ -49,3 +49,11 @@ EXPLAIN SELECT * from src_orc s1 join sr
 set hive.llap.execution.mode=all;
 
 EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value;
+
+set hive.llap.execution.mode=auto;
+
+CREATE TEMPORARY FUNCTION test_udf_get_java_string AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaString';
+
+EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where cast(key as int) > 1;
+EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as int) + 1) from src_orc
where cast(key as int) > 1;
+EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where cast(test_udf_get_java_string(cast(key
as string)) as int) > 1;

Modified: hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out?rev=1659760&r1=1659759&r2=1659760&view=diff
==============================================================================
--- hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out (original)
+++ hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out Sat Feb 14
08:35:06 2015
@@ -1011,3 +1011,182 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: CREATE TEMPORARY FUNCTION test_udf_get_java_string AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaString'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: test_udf_get_java_string
+POSTHOOK: query: CREATE TEMPORARY FUNCTION test_udf_get_java_string AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaString'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: test_udf_get_java_string
+PREHOOK: query: EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where cast(key as int)
> 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where cast(key as
int) > 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src_orc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (UDFToInteger(key) > 1) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: (UDFToInteger(key) + 1) (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column
stats: COMPLETE
+                      Group By Operator
+                        aggregations: sum(_col0)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: COMPLETE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: COMPLETE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: llap
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as
int) + 1) from src_orc where cast(key as int) > 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as
int) + 1) from src_orc where cast(key as int) > 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src_orc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (UDFToInteger(key) > 1) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: (UDFToInteger(GenericUDFTestGetJavaString(key)) + 1) (type:
int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column
stats: COMPLETE
+                      Group By Operator
+                        aggregations: sum(_col0)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: COMPLETE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: COMPLETE
+                          value expressions: _col0 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where cast(test_udf_get_java_string(cast(key
as string)) as int) > 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where cast(test_udf_get_java_string(cast(key
as string)) as int) > 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src_orc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (UDFToInteger(GenericUDFTestGetJavaString(key)) > 1) (type:
boolean)
+                    Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: (UDFToInteger(key) + 1) (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column
stats: COMPLETE
+                      Group By Operator
+                        aggregations: sum(_col0)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: COMPLETE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: COMPLETE
+                          value expressions: _col0 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+



Mime
View raw message