carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mohammadshahidkhan <...@git.apache.org>
Subject [GitHub] carbondata pull request #1583: [CARBONDATA-1822][Spark-Integration] Support ...
Date Fri, 01 Dec 2017 06:37:15 GMT
Github user mohammadshahidkhan commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/1583#discussion_r154276519
  
    --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
---
    @@ -0,0 +1,264 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql.execution.command.management
    +
    +import java.util
    +
    +import scala.collection.JavaConverters._
    +
    +import org.apache.spark.sql._
    +import org.apache.spark.sql.execution.command.MetadataCommand
    +import org.apache.spark.sql.util.CarbonException
    +
    +import org.apache.carbondata.common.logging.{LogService, LogServiceFactory}
    +import org.apache.carbondata.core.constants.CarbonCommonConstants
    +import org.apache.carbondata.core.datastore.impl.FileFactory
    +import org.apache.carbondata.core.locks.{ICarbonLock, LockUsage}
    +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier}
    +import org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl
    +import org.apache.carbondata.core.metadata.schema.table.{DataMapSchema, TableInfo}
    +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema
    +import org.apache.carbondata.core.util.CarbonProperties
    +import org.apache.carbondata.core.util.path.CarbonStorePath
    +import org.apache.carbondata.events.{CreateTablePreExecutionEvent, OperationContext,
OperationListenerBus, RefreshTablePostExecutionEvent, RefreshTablePreExecutionEvent}
    +import org.apache.carbondata.spark.util.CommonUtil
    +
    +/**
    + * Command to register carbon table from existing carbon table data
    + */
    +case class RefreshCarbonTableCommand(
    +    dbName: Option[String],
    +    tableName: String)
    +  extends MetadataCommand {
    +  val LOGGER: LogService =
    +    LogServiceFactory.getLogService(this.getClass.getName)
    +
    +  override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
    +    val metaStore = CarbonEnv.getInstance(sparkSession).carbonMetastore
    +    val databaseName = GetDB.getDatabaseName(dbName, sparkSession)
    +    val databaseLocation = GetDB.getDatabaseLocation(databaseName, sparkSession,
    +      CarbonProperties.getStorePath)
    +    // Steps
    +    // 1. get table path
    +    // 2. perform the below steps
    +    // 2.1 check if the table already register with hive then ignore and continue with
the next
    +    // schema
    +    // 2.2 register the table with the hive check if the table being registered has aggregate
table
    +    // then do the below steps
    +    // 2.2.1 validate that all the aggregate tables are copied at the store location.
    +    // 2.2.2 Register the aggregate tables
    +    val locksToBeAcquired = List(LockUsage.METADATA_LOCK, LockUsage.DROP_TABLE_LOCK)
    +    var locks = List.empty[ICarbonLock]
    +    try {
    +      val tablePath = databaseLocation + CarbonCommonConstants.FILE_SEPARATOR + tableName
    +      val absoluteTableIdentifier = AbsoluteTableIdentifier.from(tablePath, databaseName,
tableName)
    +      locks = CommonUtil.acquireLock(locksToBeAcquired, absoluteTableIdentifier)
    +      // 2.1 check if the table already register with hive then ignore and continue with
the next
    +      // schema
    +      if (!sparkSession.sessionState.catalog.listTables(databaseName)
    +        .exists(_.table.equalsIgnoreCase(tableName))) {
    +        val carbonTablePath = CarbonStorePath.getCarbonTablePath(absoluteTableIdentifier)
    +        // check the existence of the schema file to know its a carbon table
    +        val schemaFilePath = carbonTablePath.getSchemaFilePath
    +        // if schema file does not exist then the table will either non carbon table
or stale
    +        // carbon table
    +        if (FileFactory.isFileExist(schemaFilePath, FileFactory.getFileType(schemaFilePath)))
{
    +          // read TableInfo
    +          val thriftTableInfo: org.apache.carbondata.format.TableInfo = metaStore
    +            .getThriftTableInfo(carbonTablePath)(sparkSession)
    +          val schemaConverter = new ThriftWrapperSchemaConverterImpl()
    +          val tableInfo = schemaConverter
    +            .fromExternalToWrapperTableInfo(thriftTableInfo,
    +              databaseName,
    +              tableName,
    +              absoluteTableIdentifier.getTablePath)
    +          // 2.2 register the table with the hive check if the table being registered
has
    +          // aggregate table then do the below steps
    +          // 2.2.1 validate that all the aggregate tables are copied at the store location.
    +          val dataMapSchemaList = tableInfo.getDataMapSchemaList
    +          if (null != dataMapSchemaList && dataMapSchemaList.size() != 0) {
    +            // validate all the aggregate tables are copied at the storeLocation
    +            val allExists = validateAllAggregateTablePresent(databaseName,
    +              dataMapSchemaList, databaseLocation)
    +            if (!allExists) {
    +              // fail the register operation
    +              val msg = s"Table registration with Database name [$databaseName] and Table
name " +
    +                        s"[$tableName] failed. All the aggregate Tables for table [$tableName]
is" +
    +                        s" not copied under database [$databaseName]"
    +              LOGGER.audit(msg)
    +              CarbonException.analysisException(msg)
    +            }
    +            // 2.2.1 Register the aggregate tables to hive
    +            registerAggregates(databaseName, dataMapSchemaList, databaseLocation)(sparkSession)
    +          }
    +          registerTableWithHive(databaseName, tableName, tableInfo)(sparkSession)
    +        } else {
    +          LOGGER.audit(
    +            s"Table registration with Database name [$databaseName] and Table name [$tableName]
" +
    +            s"failed." +
    +            s"Table [$tableName] either non carbon table or stale carbon table under
database " +
    +            s"[$databaseName]")
    +        }
    +      } else {
    +        LOGGER.audit(
    +          s"Table registration with Database name [$databaseName] and Table name [$tableName]
" +
    +          s"failed." +
    +          s"Table [$tableName] either already exists or registered under database [$dbName]")
    +      }
    +    }
    +    finally {
    +      // release lock after command execution completion
    +      CommonUtil.releaseLocks(locks)
    +    }
    +    // update the schema modified time
    +    metaStore.updateAndTouchSchemasUpdatedTime()
    +    Seq.empty
    +  }
    +
    +  /**
    +   * the method prepare the data type for raw column
    +   *
    +   * @param column
    +   * @return
    +   */
    +  def prepareDataType(column: ColumnSchema): String = {
    +    column.getDataType.getName.toLowerCase() match {
    +      case "decimal" =>
    +        "decimal(" + column.getPrecision + "," + column.getScale + ")"
    +      case others =>
    +        others
    +    }
    +  }
    +
    +  /**
    +   * The method register the carbon table with hive
    +   *
    +   * @param dbName
    +   * @param tableName
    +   * @param tableInfo
    +   * @param sparkSession
    +   * @return
    +   */
    +  def registerTableWithHive(dbName: String,
    +      tableName: String,
    +      tableInfo: TableInfo)(sparkSession: SparkSession): Any = {
    +    val carbonMetaStore = CarbonEnv.getInstance(sparkSession).carbonMetastore
    +    val carbonSchemaString = carbonMetaStore.generateTableSchemaString(tableInfo,
    +      tableInfo.getOrCreateAbsoluteTableIdentifier())
    +    val tablePath = tableInfo.getTablePath
    +    try {
    +      val columns = tableInfo.getFactTable.getListOfColumns.asScala.filter(!_.isInvisible)
    +      // TODO adding the complex type fields should be handled
    +      // getAll Fields
    +      val fields = new Array[String](columns.size)
    +      columns.map(column => fields(column.getSchemaOrdinal)
    +        = column.getColumnName + ' ' + prepareDataType(column))
    +      val operationContext = new OperationContext
    +      val createTablePreExecutionEvent: RefreshTablePreExecutionEvent =
    +        new RefreshTablePreExecutionEvent(sparkSession,
    +          tableInfo.getOrCreateAbsoluteTableIdentifier().getCarbonTableIdentifier,
    +          tablePath)
    +      OperationListenerBus.getInstance.fireEvent(createTablePreExecutionEvent, operationContext)
    +      sparkSession.sql(
    --- End diff --
    
    @ravipesala Please elaborate how to use the same i don't see any instance.


---

Mime
View raw message