Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id AD12B200CDA for ; Fri, 4 Aug 2017 20:01:50 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id AB5B916DFFF; Fri, 4 Aug 2017 18:01:50 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id C832B16DFFE for ; Fri, 4 Aug 2017 20:01:49 +0200 (CEST) Received: (qmail 64976 invoked by uid 500); 4 Aug 2017 18:01:48 -0000 Mailing-List: contact dev-help@drill.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@drill.apache.org Delivered-To: mailing list dev@drill.apache.org Received: (qmail 64965 invoked by uid 99); 4 Aug 2017 18:01:48 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 04 Aug 2017 18:01:48 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id DC84FF334F; Fri, 4 Aug 2017 18:01:47 +0000 (UTC) From: jinfengni To: dev@drill.apache.org Reply-To: dev@drill.apache.org References: In-Reply-To: Subject: [GitHub] drill pull request #882: DRILL-4735: ConvertCountToDirectScan rule enhanceme... Content-Type: text/plain Message-Id: <20170804180147.DC84FF334F@git1-us-west.apache.org> Date: Fri, 4 Aug 2017 18:01:47 +0000 (UTC) archived-at: Fri, 04 Aug 2017 18:01:50 -0000 Github user jinfengni commented on a diff in the pull request: https://github.com/apache/drill/pull/882#discussion_r131445381 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/ConvertCountToDirectScan.java --- @@ -85,109 +91,231 @@ protected ConvertCountToDirectScan(RelOptRuleOperand rule, String id) { @Override public void onMatch(RelOptRuleCall call) { final DrillAggregateRel agg = (DrillAggregateRel) call.rel(0); - final DrillScanRel scan = (DrillScanRel) call.rel(call.rels.length -1); - final DrillProjectRel proj = call.rels.length == 3 ? (DrillProjectRel) call.rel(1) : null; + final DrillScanRel scan = (DrillScanRel) call.rel(call.rels.length - 1); + final DrillProjectRel project = call.rels.length == 3 ? (DrillProjectRel) call.rel(1) : null; final GroupScan oldGrpScan = scan.getGroupScan(); final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner()); - // Only apply the rule when : + // Only apply the rule when: // 1) scan knows the exact row count in getSize() call, // 2) No GroupBY key, - // 3) only one agg function (Check if it's count(*) below). - // 4) No distinct agg call. + // 3) No distinct agg call. if (!(oldGrpScan.getScanStats(settings).getGroupScanProperty().hasExactRowCount() && agg.getGroupCount() == 0 - && agg.getAggCallList().size() == 1 && !agg.containsDistinctCall())) { return; } - AggregateCall aggCall = agg.getAggCallList().get(0); - - if (aggCall.getAggregation().getName().equals("COUNT") ) { - - long cnt = 0; - // count(*) == > empty arg ==> rowCount - // count(Not-null-input) ==> rowCount - if (aggCall.getArgList().isEmpty() || - (aggCall.getArgList().size() == 1 && - ! agg.getInput().getRowType().getFieldList().get(aggCall.getArgList().get(0).intValue()).getType().isNullable())) { - cnt = (long) oldGrpScan.getScanStats(settings).getRecordCount(); - } else if (aggCall.getArgList().size() == 1) { - // count(columnName) ==> Agg ( Scan )) ==> columnValueCount - int index = aggCall.getArgList().get(0); - - if (proj != null) { - // project in the middle of Agg and Scan : Only when input of AggCall is a RexInputRef in Project, we find the index of Scan's field. - // For instance, - // Agg - count($0) - // \ - // Proj - Exp={$1} - // \ - // Scan (col1, col2). - // return count of "col2" in Scan's metadata, if found. - - if (proj.getProjects().get(index) instanceof RexInputRef) { - index = ((RexInputRef) proj.getProjects().get(index)).getIndex(); - } else { - return; // do not apply for all other cases. - } - } + final CountsCollector countsCollector = new CountsCollector(settings); + // if counts were not collected, rule won't be applied + if (!countsCollector.collect(agg, scan, project)) { + return; + } - String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase(); + final RelDataType scanRowType = constructDataType(agg); - cnt = oldGrpScan.getColumnValueCount(SchemaPath.getSimplePath(columnName)); - if (cnt == GroupScan.NO_COLUMN_STATS) { - // if column stats are not available don't apply this rule - return; - } - } else { - return; // do nothing. - } + final DynamicPojoRecordReader reader = new DynamicPojoRecordReader<>( + buildSchema(scanRowType.getFieldNames()), + Collections.singletonList(countsCollector.getCounts())); - RelDataType scanRowType = getCountDirectScanRowType(agg.getCluster().getTypeFactory()); + final ScanStats scanStats = new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, 1, 1, scanRowType.getFieldCount()); + final GroupScan directScan = new MetadataDirectGroupScan(reader, oldGrpScan.getFiles(), scanStats); - final ScanPrel newScan = ScanPrel.create(scan, - scan.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), getCountDirectScan(cnt), - scanRowType); + final ScanPrel newScan = ScanPrel.create(scan, + scan.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), directScan, + scanRowType); - List exprs = Lists.newArrayList(); - exprs.add(RexInputRef.of(0, scanRowType)); + final ProjectPrel newProject = new ProjectPrel(agg.getCluster(), agg.getTraitSet().plus(Prel.DRILL_PHYSICAL) + .plus(DrillDistributionTrait.SINGLETON), newScan, prepareFieldExpressions(scanRowType), agg.getRowType()); - final ProjectPrel newProj = new ProjectPrel(agg.getCluster(), agg.getTraitSet().plus(Prel.DRILL_PHYSICAL) - .plus(DrillDistributionTrait.SINGLETON), newScan, exprs, agg.getRowType()); + call.transformTo(newProject); + } - call.transformTo(newProj); + /** + * For each aggregate call creates field with "count$" prefix and bigint type. + * Constructs record type for created fields. + * + * @param aggregateRel aggregate relation expression + * @return record type + */ + private RelDataType constructDataType(DrillAggregateRel aggregateRel) { + List fields = new ArrayList<>(); + for (int i = 0; i < aggregateRel.getAggCallList().size(); i++) { + RelDataTypeField field = new RelDataTypeFieldImpl("count$" + i, i, aggregateRel.getCluster().getTypeFactory().createSqlType(SqlTypeName.BIGINT)); --- End diff -- The fieldname in RelDataTypeField "count$0", "count$1" is not very informative. I think we could either get the field name from the aggregator operator, or use "count$colname" to indicate what column's count it represents. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastructure@apache.org or file a JIRA ticket with INFRA. ---