Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 00C2C200D4F for ; Wed, 6 Dec 2017 17:02:29 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id F3B9F160C08; Wed, 6 Dec 2017 16:02:28 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 454F6160BFD for ; Wed, 6 Dec 2017 17:02:28 +0100 (CET) Received: (qmail 46980 invoked by uid 500); 6 Dec 2017 16:02:27 -0000 Mailing-List: contact issues-help@carbondata.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@carbondata.apache.org Delivered-To: mailing list issues@carbondata.apache.org Received: (qmail 46971 invoked by uid 99); 6 Dec 2017 16:02:27 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 06 Dec 2017 16:02:27 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 49244E0219; Wed, 6 Dec 2017 16:02:27 +0000 (UTC) From: ravipesala To: issues@carbondata.apache.org Reply-To: issues@carbondata.apache.org References: In-Reply-To: Subject: [GitHub] carbondata pull request #1626: [CARBONDATA-1519][PreAgg-Timeseries] Support ... Content-Type: text/plain Message-Id: <20171206160227.49244E0219@git1-us-west.apache.org> Date: Wed, 6 Dec 2017 16:02:27 +0000 (UTC) archived-at: Wed, 06 Dec 2017 16:02:29 -0000 Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1626#discussion_r155279382 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateUtil.scala --- @@ -493,4 +492,102 @@ object PreAggregateUtil { updatedPlan } + /** + * Below method will be used to get the select query when rollup policy is + * applied in case of timeseries table + * @param tableSchema + * main data map schema + * @param selectedDataMapSchema + * selected data map schema for rollup + * @return select query based on rolloup + */ + def createTimeseriesSelectQueryForRollup( + tableSchema: TableSchema, + selectedDataMapSchema: AggregationDataMapSchema): String = { + val aggregateColumns = scala.collection.mutable.ArrayBuffer.empty[String] + val groupingExpressions = scala.collection.mutable.ArrayBuffer.empty[String] + tableSchema.getListOfColumns.asScala.foreach { + a => if (a.getAggFunction.nonEmpty) { + aggregateColumns += s"${a.getAggFunction match { + case "count" => "sum" + case others@_ => others}}(${selectedDataMapSchema.getAggChildColByParent( + a.getParentColumnTableRelations.get(0).getColumnName, a.getAggFunction).getColumnName})" + } else if (a.getTimeSeriesFunction.nonEmpty) { + groupingExpressions += s"timeseries(${ + selectedDataMapSchema + .getNonAggNonTimeChildColBasedByParent(a.getParentColumnTableRelations. + get(0).getColumnName).getColumnName + } , '${ a.getTimeSeriesFunction }')" + } else { + groupingExpressions += selectedDataMapSchema + .getNonAggNonTimeChildColBasedByParent(a.getParentColumnTableRelations. + get(0).getColumnName).getColumnName + } + } + s"select ${ groupingExpressions.mkString(",") },${ aggregateColumns.mkString(",") + } from ${selectedDataMapSchema.getChildSchema.getTableName } " + + s"group by ${ groupingExpressions.mkString(",") }" + } + + /** + * Below method will be used to creating select query for timeseries + * for lowest level for aggergation like second level, in that case it will + * hit the maintable + * @param tableSchema + * data map schema + * @param parentTableName + * parent schema + * @return select query for loading + */ + def createTimeSeriesSelectQueryFromMain(tableSchema: TableSchema, + parentTableName: String): String = { + val aggregateColumns = scala.collection.mutable.ArrayBuffer.empty[String] + val groupingExpressions = scala.collection.mutable.ArrayBuffer.empty[String] + tableSchema.getListOfColumns.asScala.foreach { + a => + if (a.getAggFunction.nonEmpty) { + aggregateColumns += + s"${ a.getAggFunction }(${ a.getParentColumnTableRelations.get(0).getColumnName })" + } else if (a.getTimeSeriesFunction.nonEmpty) { + groupingExpressions += + s"timeseries(${ a.getParentColumnTableRelations.get(0).getColumnName },'${ + a + .getTimeSeriesFunction + }')" --- End diff -- format properly ---