drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amansinha100 <...@git.apache.org>
Subject [GitHub] drill pull request #637: Drill 1950 : Parquet row group filter pushdown.
Date Fri, 04 Nov 2016 15:14:51 GMT
Github user amansinha100 commented on a diff in the pull request:

    https://github.com/apache/drill/pull/637#discussion_r86566263
  
    --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
---
    @@ -0,0 +1,282 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * <p/>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p/>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.exec.expr.stat;
    +
    +import com.google.common.base.Preconditions;
    +import org.apache.drill.common.exceptions.DrillRuntimeException;
    +import org.apache.drill.common.expression.FunctionHolderExpression;
    +import org.apache.drill.common.expression.LogicalExpression;
    +import org.apache.drill.common.expression.SchemaPath;
    +import org.apache.drill.common.expression.ValueExpressions;
    +import org.apache.drill.common.expression.fn.CastFunctions;
    +import org.apache.drill.common.expression.fn.FuncHolder;
    +import org.apache.drill.common.expression.visitors.AbstractExprVisitor;
    +import org.apache.drill.common.types.TypeProtos;
    +import org.apache.drill.common.types.Types;
    +import org.apache.drill.exec.expr.DrillSimpleFunc;
    +import org.apache.drill.exec.expr.fn.DrillSimpleFuncHolder;
    +import org.apache.drill.exec.expr.fn.interpreter.InterpreterEvaluator;
    +import org.apache.drill.exec.expr.holders.BigIntHolder;
    +import org.apache.drill.exec.expr.holders.Float4Holder;
    +import org.apache.drill.exec.expr.holders.Float8Holder;
    +import org.apache.drill.exec.expr.holders.IntHolder;
    +import org.apache.drill.exec.expr.holders.ValueHolder;
    +import org.apache.drill.exec.store.parquet.stat.ColumnStatistics;
    +import org.apache.drill.exec.vector.ValueHolderHelper;
    +import org.apache.parquet.column.statistics.DoubleStatistics;
    +import org.apache.parquet.column.statistics.FloatStatistics;
    +import org.apache.parquet.column.statistics.IntStatistics;
    +import org.apache.parquet.column.statistics.LongStatistics;
    +import org.apache.parquet.column.statistics.Statistics;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import java.util.HashMap;
    +import java.util.HashSet;
    +import java.util.Map;
    +import java.util.Set;
    +
    +public class RangeExprEvaluator extends AbstractExprVisitor<Statistics, Void, RuntimeException>
{
    +  static final Logger logger = LoggerFactory.getLogger(RangeExprEvaluator.class);
    +
    +  private final Map<SchemaPath, ColumnStatistics> columnStatMap;
    +  private final long rowCount;
    +
    +  public RangeExprEvaluator(final Map<SchemaPath, ColumnStatistics> columnStatMap,
long rowCount) {
    +    this.columnStatMap = columnStatMap;
    +    this.rowCount = rowCount;
    +  }
    +
    +  public long getRowCount() {
    +    return this.rowCount;
    +  }
    +
    +  @Override
    +  public Statistics visitUnknown(LogicalExpression e, Void value) throws RuntimeException
{
    +    if (e instanceof TypedFieldExpr) {
    +      TypedFieldExpr fieldExpr = (TypedFieldExpr) e;
    +      final ColumnStatistics columnStatistics = columnStatMap.get(fieldExpr.getPath());
    +      if (columnStatistics != null) {
    +        return columnStatistics.getStatistics();
    +      } else {
    +        // field does not exist.
    +        Preconditions.checkArgument(fieldExpr.getMajorType().equals(Types.OPTIONAL_INT));
    +        IntStatistics intStatistics = new IntStatistics();
    +        intStatistics.setNumNulls(rowCount); // all values are nulls
    +        return intStatistics;
    +      }
    +    }
    +    return null;
    +  }
    +
    +  @Override
    +  public Statistics visitIntConstant(ValueExpressions.IntExpression expr, Void value)
throws RuntimeException {
    +    return getStatistics(expr.getInt());
    +  }
    +
    +  @Override
    +  public Statistics visitLongConstant(ValueExpressions.LongExpression expr, Void value)
throws RuntimeException {
    +    return getStatistics(expr.getLong());
    +  }
    +
    +  @Override
    +  public Statistics visitFloatConstant(ValueExpressions.FloatExpression expr, Void value)
throws RuntimeException {
    +    return getStatistics(expr.getFloat());
    +  }
    +
    +  @Override
    +  public Statistics visitDoubleConstant(ValueExpressions.DoubleExpression expr, Void
value) throws RuntimeException {
    +    return getStatistics(expr.getDouble());
    +  }
    +
    +  @Override
    +  public Statistics visitDateConstant(ValueExpressions.DateExpression expr, Void value)
throws RuntimeException {
    +    long dateInMillis = expr.getDate();
    +    return getStatistics(dateInMillis);
    +  }
    +
    +  @Override
    +  public Statistics visitTimeStampConstant(ValueExpressions.TimeStampExpression tsExpr,
Void value) throws RuntimeException {
    +    long tsInMillis = tsExpr.getTimeStamp();
    +    return getStatistics(tsInMillis);
    +  }
    +
    +  @Override
    +  public Statistics visitTimeConstant(ValueExpressions.TimeExpression timeExpr, Void
value) throws RuntimeException {
    +    int milliSeconds = timeExpr.getTime();
    +    return getStatistics(milliSeconds);
    +  }
    +
    +  @Override
    +  public Statistics visitFunctionHolderExpression(FunctionHolderExpression holderExpr,
Void value) throws RuntimeException {
    +    FuncHolder funcHolder = holderExpr.getHolder();
    +
    +    if (! (funcHolder instanceof DrillSimpleFuncHolder)) {
    +      // Only Drill function is allowed.
    +      return null;
    +    }
    +
    +    final String funcName = ((DrillSimpleFuncHolder) funcHolder).getRegisteredNames()[0];
    +
    +    if (CastFunctions.isCastFunction(funcName)) {
    +      Statistics stat = holderExpr.args.get(0).accept(this, null);
    +      if (stat != null && ! stat.isEmpty()) {
    +        return evalCastFunc(holderExpr, stat);
    +      }
    +    }
    +    return null;
    +  }
    +
    +  private IntStatistics getStatistics(int value) {
    +    return getStatistics(value, value);
    +  }
    +
    +  private IntStatistics getStatistics(int min, int max) {
    +    final IntStatistics intStatistics = new IntStatistics();
    +    intStatistics.setMinMax(min, max);
    +    return intStatistics;
    +  }
    +
    +  private LongStatistics getStatistics(long value) {
    +    return getStatistics(value, value);
    +  }
    +
    +  private LongStatistics getStatistics(long min, long max) {
    +    final LongStatistics longStatistics = new LongStatistics();
    +    longStatistics.setMinMax(min, max);
    +    return longStatistics;
    +  }
    +
    +  private DoubleStatistics getStatistics(double value) {
    +    return getStatistics(value, value);
    +  }
    +
    +  private DoubleStatistics getStatistics(double min, double max) {
    +    final DoubleStatistics doubleStatistics = new DoubleStatistics();
    +    doubleStatistics.setMinMax(min, max);
    +    return doubleStatistics;
    +  }
    +
    +  private FloatStatistics getStatistics(float value) {
    +    return getStatistics(value, value);
    +  }
    +
    +  private FloatStatistics getStatistics(float min, float max) {
    +    final FloatStatistics floatStatistics = new FloatStatistics();
    +    floatStatistics.setMinMax(min, max);
    +    return floatStatistics;
    +  }
    +
    +//  private int convertDrillDateValue(long dateInMillis) {
    +//    // Specific for date column created by Drill CTAS prior fix for DRILL-4203.
    +//    // Apply the same shift as in ParquetOutputRecordWriter.java for data value.
    +//    final int intValue = (int) (DateTimeUtils.toJulianDayNumber(dateInMillis) + JULIAN_DAY_EPOC);
    +//    return intValue;
    +//  }
    +
    +  private Statistics evalCastFunc(FunctionHolderExpression holderExpr, Statistics input)
{
    +    try {
    +      DrillSimpleFuncHolder funcHolder = (DrillSimpleFuncHolder) holderExpr.getHolder();
    +
    +      DrillSimpleFunc interpreter = funcHolder.createInterpreter();
    +
    +      final ValueHolder minHolder, maxHolder;
    +
    +      TypeProtos.MinorType srcType = holderExpr.args.get(0).getMajorType().getMinorType();
    +      TypeProtos.MinorType destType = holderExpr.getMajorType().getMinorType();
    +
    +      if (srcType.equals(destType)) {
    +        // same type cast ==> NoOp.
    +        return input;
    +      } else if (!CAST_FUNC.containsKey(srcType) || !CAST_FUNC.get(srcType).contains(destType))
{
    +        return null; // cast func between srcType and destType is NOT allowed.
    +      }
    +
    +      switch (srcType) {
    +      case INT :
    +        minHolder = ValueHolderHelper.getIntHolder(((IntStatistics)input).getMin());
    +        maxHolder = ValueHolderHelper.getIntHolder(((IntStatistics)input).getMax());
    +        break;
    +      case BIGINT:
    +        minHolder = ValueHolderHelper.getBigIntHolder(((LongStatistics)input).getMin());
    +        maxHolder = ValueHolderHelper.getBigIntHolder(((LongStatistics)input).getMax());
    +        break;
    +      case FLOAT4:
    +        minHolder = ValueHolderHelper.getFloat4Holder(((FloatStatistics)input).getMin());
    +        maxHolder = ValueHolderHelper.getFloat4Holder(((FloatStatistics)input).getMax());
    +        break;
    +      case FLOAT8:
    +        minHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics)input).getMin());
    +        maxHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics)input).getMax());
    +        break;
    +      default:
    +        return null;
    +      }
    +
    +      final ValueHolder[] args1 = {minHolder};
    +      final ValueHolder[] args2 = {maxHolder};
    +
    +      final ValueHolder minFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter,
args1, holderExpr.getName());
    +      final ValueHolder maxFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter,
args2, holderExpr.getName());
    +
    +      switch (destType) {
    +      //TODO : need handle # of nulls.
    +      case INT:
    +        return getStatistics( ((IntHolder)minFuncHolder).value, ((IntHolder)maxFuncHolder).value);
    +      case BIGINT:
    +        return getStatistics( ((BigIntHolder)minFuncHolder).value, ((BigIntHolder)maxFuncHolder).value);
    +      case FLOAT4:
    +        return getStatistics( ((Float4Holder)minFuncHolder).value, ((Float4Holder)maxFuncHolder).value);
    +      case FLOAT8:
    +        return getStatistics( ((Float8Holder)minFuncHolder).value, ((Float8Holder)maxFuncHolder).value);
    +      default:
    +        return null;
    +      }
    +    } catch (Exception e) {
    +      throw new DrillRuntimeException("Error in evaluating function of " + holderExpr.getName()
);
    +    }
    +  }
    +
    +  static Map<TypeProtos.MinorType, Set<TypeProtos.MinorType>> CAST_FUNC =
new HashMap<>();
    --- End diff --
    
    Right, the date/timestamp cast would be quite useful for row-group pruning, but its ok
to restrict for now.  Will we document the types of casts that will be supported for such
pruning ?  
    
    Also, I just noticed that this is populating a static HashMap.  This won't be thread-safe.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message