carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jackylk <...@git.apache.org>
Subject [GitHub] incubator-carbondata pull request #448: [CARBONDATA-547] Added CarbonSession...
Date Mon, 26 Dec 2016 09:12:21 GMT
Github user jackylk commented on a diff in the pull request:

    https://github.com/apache/incubator-carbondata/pull/448#discussion_r93853272
  
    --- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonCommonSqlParser.scala
---
    @@ -0,0 +1,970 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql.catalyst
    +
    +import java.util.regex.{Matcher, Pattern}
    +
    +import scala.collection.JavaConverters._
    +import scala.collection.mutable.{LinkedHashSet, Map}
    +import scala.language.implicitConversions
    +import scala.util.matching.Regex
    +
    +import org.apache.hadoop.hive.ql.lib.Node
    +import org.apache.hadoop.hive.ql.parse._
    +import org.apache.spark.sql.catalyst.trees.CurrentOrigin
    +import org.apache.spark.sql.execution.command._
    +
    +import org.apache.carbondata.common.logging.LogServiceFactory
    +import org.apache.carbondata.core.carbon.metadata.datatype.DataType
    +import org.apache.carbondata.core.constants.CarbonCommonConstants
    +import org.apache.carbondata.core.util.DataTypeUtil
    +import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
    +import org.apache.carbondata.spark.util.CommonUtil
    +
    +/**
    + * TODO remove the duplicate code and add the common methods to common class.
    + * Parser for All Carbon DDL, DML cases in Unified context
    + */
    +abstract class CarbonCommonSqlParser extends AbstractCarbonSparkSQLParser {
    +
    +  val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName)
    +  protected val AGGREGATE = carbonKeyWord("AGGREGATE")
    +  protected val AS = carbonKeyWord("AS")
    +  protected val AGGREGATION = carbonKeyWord("AGGREGATION")
    +  protected val ALL = carbonKeyWord("ALL")
    +  protected val HIGH_CARDINALITY_DIMS = carbonKeyWord("NO_DICTIONARY")
    +  protected val BEFORE = carbonKeyWord("BEFORE")
    +  protected val BY = carbonKeyWord("BY")
    +  protected val CARDINALITY = carbonKeyWord("CARDINALITY")
    +  protected val CASCADE = carbonKeyWord("CASCADE")
    +  protected val CLASS = carbonKeyWord("CLASS")
    +  protected val CLEAN = carbonKeyWord("CLEAN")
    +  protected val COLS = carbonKeyWord("COLS")
    +  protected val COLUMNS = carbonKeyWord("COLUMNS")
    +  protected val COMPACT = carbonKeyWord("COMPACT")
    +  protected val CREATE = carbonKeyWord("CREATE")
    +  protected val CUBE = carbonKeyWord("CUBE")
    +  protected val CUBES = carbonKeyWord("CUBES")
    +  protected val DATA = carbonKeyWord("DATA")
    +  protected val DATABASE = carbonKeyWord("DATABASE")
    +  protected val DATABASES = carbonKeyWord("DATABASES")
    +  protected val DELETE = carbonKeyWord("DELETE")
    +  protected val DELIMITER = carbonKeyWord("DELIMITER")
    +  protected val DESCRIBE = carbonKeyWord("DESCRIBE")
    +  protected val DESC = carbonKeyWord("DESC")
    +  protected val DETAIL = carbonKeyWord("DETAIL")
    +  protected val DIMENSIONS = carbonKeyWord("DIMENSIONS")
    +  protected val DIMFOLDERPATH = carbonKeyWord("DIMFOLDERPATH")
    +  protected val DROP = carbonKeyWord("DROP")
    +  protected val ESCAPECHAR = carbonKeyWord("ESCAPECHAR")
    +  protected val EXCLUDE = carbonKeyWord("EXCLUDE")
    +  protected val EXPLAIN = carbonKeyWord("EXPLAIN")
    +  protected val EXTENDED = carbonKeyWord("EXTENDED")
    +  protected val FORMATTED = carbonKeyWord("FORMATTED")
    +  protected val FACT = carbonKeyWord("FACT")
    +  protected val FIELDS = carbonKeyWord("FIELDS")
    +  protected val FILEHEADER = carbonKeyWord("FILEHEADER")
    +  protected val SERIALIZATION_NULL_FORMAT = carbonKeyWord("SERIALIZATION_NULL_FORMAT")
    +  protected val BAD_RECORDS_LOGGER_ENABLE = carbonKeyWord("BAD_RECORDS_LOGGER_ENABLE")
    +  protected val BAD_RECORDS_ACTION = carbonKeyWord("BAD_RECORDS_ACTION")
    +  protected val FILES = carbonKeyWord("FILES")
    +  protected val FROM = carbonKeyWord("FROM")
    +  protected val HIERARCHIES = carbonKeyWord("HIERARCHIES")
    +  protected val IN = carbonKeyWord("IN")
    +  protected val INCLUDE = carbonKeyWord("INCLUDE")
    +  protected val INPATH = carbonKeyWord("INPATH")
    +  protected val INTO = carbonKeyWord("INTO")
    +  protected val LEVELS = carbonKeyWord("LEVELS")
    +  protected val LIKE = carbonKeyWord("LIKE")
    +  protected val LOAD = carbonKeyWord("LOAD")
    +  protected val LOCAL = carbonKeyWord("LOCAL")
    +  protected val MAPPED = carbonKeyWord("MAPPED")
    +  protected val MEASURES = carbonKeyWord("MEASURES")
    +  protected val MULTILINE = carbonKeyWord("MULTILINE")
    +  protected val COMPLEX_DELIMITER_LEVEL_1 = carbonKeyWord("COMPLEX_DELIMITER_LEVEL_1")
    +  protected val COMPLEX_DELIMITER_LEVEL_2 = carbonKeyWord("COMPLEX_DELIMITER_LEVEL_2")
    +  protected val OPTIONS = carbonKeyWord("OPTIONS")
    +  protected val OUTPATH = carbonKeyWord("OUTPATH")
    +  protected val OVERWRITE = carbonKeyWord("OVERWRITE")
    +  protected val PARTITION_COUNT = carbonKeyWord("PARTITION_COUNT")
    +  protected val PARTITIONDATA = carbonKeyWord("PARTITIONDATA")
    +  protected val PARTITIONER = carbonKeyWord("PARTITIONER")
    +  protected val QUOTECHAR = carbonKeyWord("QUOTECHAR")
    +  protected val RELATION = carbonKeyWord("RELATION")
    +  protected val SCHEMA = carbonKeyWord("SCHEMA")
    +  protected val SCHEMAS = carbonKeyWord("SCHEMAS")
    +  protected val SHOW = carbonKeyWord("SHOW")
    +  protected val TABLES = carbonKeyWord("TABLES")
    +  protected val TABLE = carbonKeyWord("TABLE")
    +  protected val TERMINATED = carbonKeyWord("TERMINATED")
    +  protected val TYPE = carbonKeyWord("TYPE")
    +  protected val USE = carbonKeyWord("USE")
    +  protected val WHERE = carbonKeyWord("WHERE")
    +  protected val WITH = carbonKeyWord("WITH")
    +  protected val AGGREGATETABLE = carbonKeyWord("AGGREGATETABLE")
    +  protected val ABS = carbonKeyWord("abs")
    +
    +  protected val FOR = carbonKeyWord("FOR")
    +  protected val SCRIPTS = carbonKeyWord("SCRIPTS")
    +  protected val USING = carbonKeyWord("USING")
    +  protected val LIMIT = carbonKeyWord("LIMIT")
    +  protected val DEFAULTS = carbonKeyWord("DEFAULTS")
    +  protected val ALTER = carbonKeyWord("ALTER")
    +  protected val ADD = carbonKeyWord("ADD")
    +
    +  protected val IF = carbonKeyWord("IF")
    +  protected val NOT = carbonKeyWord("NOT")
    +  protected val EXISTS = carbonKeyWord("EXISTS")
    +  protected val DIMENSION = carbonKeyWord("DIMENSION")
    +  protected val STARTTIME = carbonKeyWord("STARTTIME")
    +  protected val SEGMENTS = carbonKeyWord("SEGMENTS")
    +  protected val SEGMENT = carbonKeyWord("SEGMENT")
    +
    +  protected val STRING = carbonKeyWord("STRING")
    +  protected val INTEGER = carbonKeyWord("INTEGER")
    +  protected val TIMESTAMP = carbonKeyWord("TIMESTAMP")
    +  protected val DATE = carbonKeyWord("DATE")
    +  protected val CHAR = carbonKeyWord("CHAR")
    +  protected val NUMERIC = carbonKeyWord("NUMERIC")
    +  protected val DECIMAL = carbonKeyWord("DECIMAL")
    +  protected val DOUBLE = carbonKeyWord("DOUBLE")
    +  protected val SHORT = carbonKeyWord("SMALLINT")
    +  protected val INT = carbonKeyWord("INT")
    +  protected val BIGINT = carbonKeyWord("BIGINT")
    +  protected val ARRAY = carbonKeyWord("ARRAY")
    +  protected val STRUCT = carbonKeyWord("STRUCT")
    +
    +  protected val doubleQuotedString = "\"([^\"]+)\"".r
    +  protected val singleQuotedString = "'([^']+)'".r
    +
    +  protected val newReservedWords =
    +    this.getClass
    +      .getMethods
    +      .filter(_.getReturnType == classOf[Keyword])
    +      .map(_.invoke(this).asInstanceOf[Keyword].str)
    +
    +  override val lexical = {
    +    val sqllex = new SqlLexical()
    +    sqllex.initialize(newReservedWords)
    +    sqllex
    +
    +  }
    +
    +  import lexical.Identifier
    +
    +  implicit def regexToParser(regex: Regex): Parser[String] = {
    +    acceptMatch(
    +    s"identifier matching regex ${ regex }",
    +    { case Identifier(str) if regex.unapplySeq(str).isDefined => str }
    +    )
    +  }
    +
    +  /**
    +   * This will convert key word to regular expression.
    +   *
    +   * @param keys
    +   * @return
    +   */
    +  private def carbonKeyWord(keys: String) = {
    +    ("(?i)" + keys).r
    +  }
    +
    +  protected val escapedIdentifier = "`([^`]+)`".r
    +
    +  private def reorderDimensions(dims: Seq[Field]): Seq[Field] = {
    +    var complexDimensions: Seq[Field] = Seq()
    +    var dimensions: Seq[Field] = Seq()
    +    dims.foreach { dimension =>
    +      dimension.dataType.getOrElse("NIL") match {
    +        case "Array" => complexDimensions = complexDimensions :+ dimension
    +        case "Struct" => complexDimensions = complexDimensions :+ dimension
    +        case _ => dimensions = dimensions :+ dimension
    +      }
    +    }
    +    dimensions ++ complexDimensions
    +  }
    +
    +
    +
    +  def getScaleAndPrecision(dataType: String): (Int, Int) = {
    +    val m: Matcher = Pattern.compile("^decimal\\(([^)]+)\\)").matcher(dataType)
    +    m.find()
    +    val matchedString: String = m.group(1)
    +    val scaleAndPrecision = matchedString.split(",")
    +    (Integer.parseInt(scaleAndPrecision(0).trim), Integer.parseInt(scaleAndPrecision(1).trim))
    +  }
    +
    +  /**
    +   * This will prepate the Model from the Tree details.
    +   *
    +   * @param ifNotExistPresent
    +   * @param dbName
    +   * @param tableName
    +   * @param fields
    +   * @param partitionCols
    +   * @param tableProperties
    +   * @return
    +   */
    +  def prepareTableModel(ifNotExistPresent: Boolean, dbName: Option[String]
    +      , tableName: String, fields: Seq[Field],
    +      partitionCols: Seq[PartitionerField],
    +      tableProperties: Map[String, String]): TableModel
    +  = {
    +
    +    fields.zipWithIndex.foreach { x =>
    +      x._1.schemaOrdinal = x._2
    +    }
    +    val (dims: Seq[Field], noDictionaryDims: Seq[String]) = extractDimColsAndNoDictionaryFields(
    +      fields, tableProperties)
    +    if (dims.isEmpty) {
    +      throw new MalformedCarbonCommandException(s"Table ${
    +        dbName.getOrElse(
    +          CarbonCommonConstants.DATABASE_DEFAULT_NAME)
    +      }.$tableName"
    +                                                +
    +                                                " can not be created without key columns.
Please " +
    +                                                "use DICTIONARY_INCLUDE or " +
    +                                                "DICTIONARY_EXCLUDE to set at least one
key " +
    +                                                "column " +
    +                                                "if all specified columns are numeric
types")
    +    }
    +    val msrs: Seq[Field] = extractMsrColsFromFields(fields, tableProperties)
    +
    +    // column properties
    +    val colProps = extractColumnProperties(fields, tableProperties)
    +    // get column groups configuration from table properties.
    +    val groupCols: Seq[String] = updateColumnGroupsInField(tableProperties,
    +      noDictionaryDims, msrs, dims)
    +
    +    // get no inverted index columns from table properties.
    +    val noInvertedIdxCols = extractNoInvertedIndexColumns(fields, tableProperties)
    +
    +    // validate the tableBlockSize from table properties
    +    CommonUtil.validateTableBlockSize(tableProperties)
    +
    +    TableModel(
    +      ifNotExistPresent,
    +      dbName.getOrElse(CarbonCommonConstants.DATABASE_DEFAULT_NAME),
    +      dbName,
    +      tableName,
    +      tableProperties,
    +      reorderDimensions(dims.map(f => normalizeType(f)).map(f => addParent(f))),
    +      msrs.map(f => normalizeType(f)),
    +      Option(noDictionaryDims),
    +      Option(noInvertedIdxCols),
    +      groupCols,
    +      Some(colProps))
    +  }
    +
    +  /**
    +   * Extract the column groups configuration from table properties.
    +   * Based on this Row groups of fields will be determined.
    +   *
    +   * @param tableProperties
    +   * @return
    +   */
    +  protected def updateColumnGroupsInField(tableProperties: Map[String, String],
    +      noDictionaryDims: Seq[String],
    +      msrs: Seq[Field],
    +      dims: Seq[Field]): Seq[String] = {
    +    if (tableProperties.get(CarbonCommonConstants.COLUMN_GROUPS).isDefined) {
    +
    +      var splittedColGrps: Seq[String] = Seq[String]()
    +      val nonSplitCols: String = tableProperties.get(CarbonCommonConstants.COLUMN_GROUPS).get
    +
    +      // row groups will be specified in table properties like -> "(col1,col2),(col3,col4)"
    +      // here first splitting the value by () . so that the above will be splitted into
2 strings.
    +      // [col1,col2] [col3,col4]
    +      val m: Matcher = Pattern.compile("\\(([^)]+)\\)").matcher(nonSplitCols)
    +      while (m.find()) {
    +        val oneGroup: String = m.group(1)
    +        CommonUtil.validateColumnGroup(oneGroup, noDictionaryDims, msrs, splittedColGrps,
dims)
    +        val arrangedColGrp = rearrangedColumnGroup(oneGroup, dims)
    +        splittedColGrps :+= arrangedColGrp
    +      }
    +      // This will  be furthur handled.
    +      CommonUtil.arrangeColGrpsInSchemaOrder(splittedColGrps, dims)
    +    } else {
    +      null
    +    }
    +  }
    +
    +  def rearrangedColumnGroup(colGroup: String, dims: Seq[Field]): String = {
    +    // if columns in column group is not in schema order than arrange it in schema order
    +    var colGrpFieldIndx: Seq[Int] = Seq[Int]()
    +    colGroup.split(',').map(_.trim).foreach { x =>
    +      dims.zipWithIndex.foreach { dim =>
    +        if (dim._1.column.equalsIgnoreCase(x)) {
    +          colGrpFieldIndx :+= dim._2
    +        }
    +      }
    +    }
    +    // sort it
    +    colGrpFieldIndx = colGrpFieldIndx.sorted
    +    // check if columns in column group is in schema order
    +    if (!checkIfInSequence(colGrpFieldIndx)) {
    +      throw new MalformedCarbonCommandException("Invalid column group:" + colGroup)
    +    }
    +    def checkIfInSequence(colGrpFieldIndx: Seq[Int]): Boolean = {
    +      for (i <- 0 until (colGrpFieldIndx.length - 1)) {
    +        if ((colGrpFieldIndx(i + 1) - colGrpFieldIndx(i)) != 1) {
    +          throw new MalformedCarbonCommandException(
    +            "Invalid column group,column in group should be contiguous as per schema.")
    +        }
    +      }
    +      true
    +    }
    +    val colGrpNames: StringBuilder = StringBuilder.newBuilder
    +    for (i <- colGrpFieldIndx.indices) {
    +      colGrpNames.append(dims(colGrpFieldIndx(i)).column)
    +      if (i < (colGrpFieldIndx.length - 1)) {
    +        colGrpNames.append(",")
    +      }
    +    }
    +    colGrpNames.toString()
    +  }
    +
    +  /**
    +   * For getting the partitioner Object
    +   *
    +   * @param partitionCols
    +   * @param tableProperties
    +   * @return
    +   */
    +  protected def getPartitionerObject(partitionCols: Seq[PartitionerField],
    +      tableProperties: Map[String, String]):
    +  Option[Partitioner] = {
    +
    +    // by default setting partition class empty.
    +    // later in table schema it is setting to default value.
    +    var partitionClass: String = ""
    +    var partitionCount: Int = 1
    +    var partitionColNames: Array[String] = Array[String]()
    +    if (tableProperties.get(CarbonCommonConstants.PARTITIONCLASS).isDefined) {
    +      partitionClass = tableProperties.get(CarbonCommonConstants.PARTITIONCLASS).get
    +    }
    +
    +    if (tableProperties.get(CarbonCommonConstants.PARTITIONCOUNT).isDefined) {
    +      try {
    +        partitionCount = tableProperties.get(CarbonCommonConstants.PARTITIONCOUNT).get.toInt
    +      } catch {
    +        case e: Exception => // no need to do anything.
    +      }
    +    }
    +
    +    partitionCols.foreach(col =>
    +      partitionColNames :+= col.partitionColumn
    +    )
    +
    +    // this means user has given partition cols list
    +    if (!partitionColNames.isEmpty) {
    +      return Option(Partitioner(partitionClass, partitionColNames, partitionCount, null))
    +    }
    +    // if partition cols are not given then no need to do partition.
    +    None
    +  }
    +
    +  protected def extractColumnProperties(fields: Seq[Field], tableProperties: Map[String,
String]):
    +  java.util.Map[String, java.util.List[ColumnProperty]] = {
    +    val colPropMap = new java.util.HashMap[String, java.util.List[ColumnProperty]]()
    +    fields.foreach { field =>
    +      if (field.children.isDefined && field.children.get != null) {
    +        fillAllChildrenColumnProperty(field.column, field.children, tableProperties,
colPropMap)
    +      } else {
    +        fillColumnProperty(None, field.column, tableProperties, colPropMap)
    +      }
    +    }
    +    colPropMap
    +  }
    +
    +  protected def fillAllChildrenColumnProperty(parent: String, fieldChildren: Option[List[Field]],
    +      tableProperties: Map[String, String],
    +      colPropMap: java.util.HashMap[String, java.util.List[ColumnProperty]]) {
    +    fieldChildren.foreach(fields => {
    +      fields.foreach(field => {
    +        fillColumnProperty(Some(parent), field.column, tableProperties, colPropMap)
    +      }
    +      )
    +    }
    +    )
    +  }
    +
    +  protected def fillColumnProperty(parentColumnName: Option[String],
    +      columnName: String,
    +      tableProperties: Map[String, String],
    +      colPropMap: java.util.HashMap[String, java.util.List[ColumnProperty]]) {
    +    val (tblPropKey, colProKey) = getKey(parentColumnName, columnName)
    +    val colProps = CommonUtil.getColumnProperties(tblPropKey, tableProperties)
    +    if (colProps.isDefined) {
    +      colPropMap.put(colProKey, colProps.get)
    +    }
    +  }
    +
    +  def getKey(parentColumnName: Option[String],
    +      columnName: String): (String, String) = {
    +    if (parentColumnName.isDefined) {
    +      if (columnName == "val") {
    +        (parentColumnName.get, parentColumnName.get + "." + columnName)
    +      } else {
    +        (parentColumnName.get + "." + columnName, parentColumnName.get + "." + columnName)
    +      }
    +    } else {
    +      (columnName, columnName)
    +    }
    +  }
    +
    +  /**
    +   * This will extract the no inverted columns fields.
    +   * By default all dimensions use inverted index.
    +   *
    +   * @param fields
    +   * @param tableProperties
    +   * @return
    +   */
    +  protected def extractNoInvertedIndexColumns(fields: Seq[Field],
    +      tableProperties: Map[String, String]):
    +  Seq[String] = {
    --- End diff --
    
    move to previous line


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message