drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jinfengni <...@git.apache.org>
Subject [GitHub] drill pull request: Drill 4372 review
Date Wed, 02 Mar 2016 18:16:48 GMT
Github user jinfengni commented on a diff in the pull request:

    https://github.com/apache/drill/pull/397#discussion_r54765563
  
    --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/DrillFunctionRegistry.java
---
    @@ -92,38 +94,110 @@ public DrillFunctionRegistry(ScanResult classpathScan) {
       }
     
       public int size(){
    -    return methods.size();
    +    return registeredFunctions.size();
       }
     
       /** Returns functions with given name. Function name is case insensitive. */
       public List<DrillFuncHolder> getMethods(String name) {
    -    return this.methods.get(name.toLowerCase());
    +    return this.registeredFunctions.get(name.toLowerCase());
    +  }
    +
    +  public Collection<DrillFuncHolder> getAllMethods() {
    +    return Collections.unmodifiableCollection(registeredFunctions.values());
       }
     
       public void register(DrillOperatorTable operatorTable) {
    -    SqlOperator op;
    -    for (Entry<String, Collection<DrillFuncHolder>> function : methods.asMap().entrySet())
{
    -      Set<Integer> argCounts = Sets.newHashSet();
    -      String name = function.getKey().toUpperCase();
    +    for (Entry<String, Collection<DrillFuncHolder>> function : registeredFunctions.asMap().entrySet())
{
    +      final ArrayListMultimap<Pair<Integer, Integer>, DrillFuncHolder> functions
= ArrayListMultimap.create();
    +      final ArrayListMultimap<Integer, DrillFuncHolder> aggregateFunctions = ArrayListMultimap.create();
    +      final String name = function.getKey().toUpperCase();
    +      boolean isDeterministic = true;
           for (DrillFuncHolder func : function.getValue()) {
    -        if (argCounts.add(func.getParamCount())) {
    -          if (func.isAggregating()) {
    -            op = new DrillSqlAggOperator(name, func.getParamCount());
    -          } else {
    -            boolean isDeterministic;
    -            // prevent Drill from folding constant functions with types that cannot be
materialized
    -            // into literals
    -            if (DrillConstExecutor.NON_REDUCIBLE_TYPES.contains(func.getReturnType().getMinorType()))
{
    -              isDeterministic = false;
    -            } else {
    -              isDeterministic = func.isDeterministic();
    -            }
    -            op = new DrillSqlOperator(name, func.getParamCount(), func.getReturnType(),
isDeterministic);
    -          }
    -          operatorTable.add(function.getKey(), op);
    +        final int paramCount = func.getParamCount();
    +        if(func.isAggregating()) {
    +          aggregateFunctions.put(paramCount, func);
    +        } else {
    +          final Pair<Integer, Integer> argNumerRange = getArgNumerRange(name, func);
    +          functions.put(argNumerRange, func);
             }
    +
    +        if(!func.isDeterministic()) {
    +          isDeterministic = false;
    +        }
    +      }
    +      for (Entry<Pair<Integer, Integer>, Collection<DrillFuncHolder>>
entry : functions.asMap().entrySet()) {
    +        final DrillSqlOperator drillSqlOperator;
    +        final Pair<Integer, Integer> range = entry.getKey();
    +        final int max = range.getRight();
    +        final int min = range.getLeft();
    +        drillSqlOperator = new DrillSqlOperator(
    +            name,
    +            Lists.newArrayList(entry.getValue()),
    +            min,
    +            max,
    +            isDeterministic);
    +        operatorTable.add(name, drillSqlOperator);
    +      }
    +      for (Entry<Integer, Collection<DrillFuncHolder>> entry : aggregateFunctions.asMap().entrySet())
{
    +        operatorTable.add(name, new DrillSqlAggOperator(name, Lists.newArrayList(entry.getValue()),
entry.getKey()));
           }
         }
    +
    +    registerCalcitePlaceHolderFunction(operatorTable);
    +  }
    +
    +  /**
    +   * These {@link DrillSqlOperator} merely act as a placeholder so that Calcite
    +   * allows convert_to(), convert_from(), flatten(), date_part() functions in SQL.
    +   */
    +  private void registerCalcitePlaceHolderFunction(DrillOperatorTable operatorTable) {
    +    final String convert_to = "CONVERT_TO";
    +    final String convert_from = "CONVERT_FROM";
    +    final String flatten = "FLATTEN";
    +    final String date_part = "DATE_PART";
    +
    +    operatorTable.add(convert_to,
    +        new DrillSqlOperator(convert_to,
    +            2,
    +            true));
    +    operatorTable.add(convert_from,
    +        new DrillSqlOperator(convert_from,
    +            2,
    +            true));
    +    operatorTable.add(flatten,
    +        new DrillSqlOperator(flatten,
    +            1,
    +            true));
    +    operatorTable.add(date_part,
    +        new DrillSqlOperator(date_part,
    +            2,
    +            true));
       }
     
    +  private Pair<Integer, Integer> getArgNumerRange(final String name, final DrillFuncHolder
func) {
    +    switch(name.toUpperCase()) {
    +      case "CONCAT":
    +        return Pair.of(1, Integer.MAX_VALUE);
    +
    +      // Drill does not have a FunctionTemplate for the lpad/rpad with two arguments.
    +      // It relies on DrillOptiq.java to add a third dummy argument to be acceptable
    +      // by the FunctionTemplate in StringFunctions.java
    +      case "LPAD":
    +      case "RPAD":
    +        return Pair.of(2, 3);
    +
    +      // Similar to the reason above, DrillOptiq.java is used for rewritting
    +      case "LTRIM":
    +      case "RTRIM":
    +      case "BTRIM":
    +        return Pair.of(1, 2);
    +
    +      // Similar to the reason above, DrillOptiq.java is used for rewritting
    +      case "LENGTH":
    +        return Pair.of(1, 2);
    +
    +      default:
    +        return Pair.of(func.getParamCount(), func.getParamCount());
    +    }
    +  }
    --- End diff --
    
    @jacques-n , here is my understanding about the dummy functions @hsuanyi is talking about.
    
    For 'lpad' function, Calcite allows both 2 arguments or 3 arguments. Drill has only implementation
for the case of 3 arguments. What Drill did is DrillOptiq will rewrite the case of 2 arguments
into 3 arguments [2]. The dummy function is for the case of 2 arguments, since Drill's run-time
generate logic does not need the implementation of 2 arguments at all, due to the rewrite
in DrillOptiq. 
    
    Another option is 1) to remove the rewrite in DrillOptiq and 2) add the implementation
for the case of 2 arguments (which is essentially same as the one for 3 arguments).  The new
implementation for 2 arguments case is not dummy at all. 
    
      
    
    [1] https://github.com/apache/drill/blob/master/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java#L702-L707
    [2] https://github.com/apache/drill/blob/master/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillOptiq.java#L429-L434


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message