carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From QiangCai <...@git.apache.org>
Subject [GitHub] carbondata pull request #1605: [CARBONDATA-1526] [PreAgg] Added support to c...
Date Thu, 07 Dec 2017 07:41:54 GMT
Github user QiangCai commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/1605#discussion_r155440875
  
    --- Diff: integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/AggregateDataMapCompactor.scala
---
    @@ -0,0 +1,118 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.carbondata.spark.rdd
    +
    +import java.util.concurrent.ExecutorService
    +
    +import scala.collection.JavaConverters._
    +
    +import org.apache.spark.sql.{CarbonSession, SQLContext}
    +import org.apache.spark.sql.execution.command.CompactionModel
    +import org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand
    +import org.apache.spark.sql.execution.command.preaaggregate.PreAggregateUtil
    +import org.apache.spark.sql.parser.CarbonSpark2SqlParser
    +
    +import org.apache.carbondata.core.constants.CarbonCommonConstants
    +import org.apache.carbondata.core.statusmanager.{SegmentStatus, SegmentStatusManager}
    +import org.apache.carbondata.core.util.path.CarbonStorePath
    +import org.apache.carbondata.processing.loading.model.CarbonLoadModel
    +import org.apache.carbondata.processing.merger.{CarbonDataMergerUtil, CompactionType}
    +
    +/**
    + * Used to perform compaction on Aggregate data map.
    + */
    +class AggregateDataMapCompactor(carbonLoadModel: CarbonLoadModel,
    +    compactionModel: CompactionModel,
    +    executor: ExecutorService,
    +    sqlContext: SQLContext,
    +    storeLocation: String)
    +  extends Compactable(carbonLoadModel, compactionModel, executor, sqlContext, storeLocation)
{
    +
    +  override def executeCompaction(): Unit = {
    +    val carbonTable = carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable
    +    val loadMetaDataDetails = identifySegmentsToBeMerged()
    +    val segments = loadMetaDataDetails.asScala.map(_.getLoadName)
    +    if (segments.nonEmpty) {
    +      val mergedLoadName = CarbonDataMergerUtil.getMergedLoadName(loadMetaDataDetails).split("_")(1)
    +      CarbonSession.threadSet(
    +        CarbonCommonConstants.CARBON_INPUT_SEGMENTS +
    +        carbonLoadModel.getDatabaseName + "." +
    +        carbonLoadModel.getTableName,
    +        segments.mkString(","))
    +      CarbonSession.threadSet(
    +        CarbonCommonConstants.VALIDATE_CARBON_INPUT_SEGMENTS +
    +        carbonLoadModel.getDatabaseName + "." +
    +        carbonLoadModel.getTableName, "false")
    +      val headers = carbonTable.getTableInfo.getFactTable.getListOfColumns.asScala
    +        .map(_.getColumnName).mkString(",")
    +      // Creating a new query string to insert data into pre-aggregate table from that
same table.
    +      // For example: To compact preaggtable1 we can fire a query like insert into preaggtable1
    +      // select * from preaggtable1
    +      // The following code will generate the select query with a load UDF that will
be used to
    +      // apply DataLoadingRules
    +      val childDataFrame = sqlContext.sparkSession.sql(new CarbonSpark2SqlParser()
    +        // adding the aggregation load UDF
    +        .addPreAggLoadFunction(PreAggregateUtil
    +        // creating the select query on the bases on table schema
    +        .createChildSelectQuery(carbonTable.getTableInfo.getFactTable))).drop("preAggLoad")
    --- End diff --
    
    indentation is incorrect


---

Mime
View raw message