spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kevinyu98 <...@git.apache.org>
Subject [GitHub] spark pull request #12646: [SPARK-14878][SQL] Trim characters string functio...
Date Tue, 05 Sep 2017 21:16:49 GMT
Github user kevinyu98 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/12646#discussion_r137116856
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
---
    @@ -503,69 +504,319 @@ case class FindInSet(left: Expression, right: Expression) extends
BinaryExpressi
       override def prettyName: String = "find_in_set"
     }
     
    +trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
    +
    +  override def dataType: DataType = StringType
    +  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
    +
    +  override def nullable: Boolean = children.exists(_.nullable)
    +  override def foldable: Boolean = children.forall(_.foldable)
    +
    +  override def sql: String = {
    +    if (children.size == 1) {
    +      val childrenSQL = children.map(_.sql).mkString(", ")
    +      s"$prettyName($childrenSQL)"
    +    } else {
    +      val trimSQL = children(0).map(_.sql).mkString(", ")
    +      val tarSQL = children(1).map(_.sql).mkString(", ")
    +      s"$prettyName($trimSQL, $tarSQL)"
    +    }
    +  }
    +}
    +
    +object StringTrim {
    +  def apply(str: Expression, trimStr: Expression) : StringTrim = StringTrim(str, Some(trimStr))
    +  def apply(str: Expression) : StringTrim = StringTrim(str, None)
    +}
    +
     /**
    - * A function that trim the spaces from both ends for the specified string.
    - */
    + * A function that takes a character string, removes the leading and trailing characters
matching with the characters
    + * in the trim string, returns the new string.
    + * If BOTH and trimStr keywords are not specified, it defaults to remove space character
from both ends. The trim
    + * function will have one argument, which contains the source string.
    + * If BOTH and trimStr keywords are specified, it trims the characters from both ends,
and the trim function will have
    + * two arguments, the first argument contains trimStr, the second argument contains the
source string.
    + * trimStr: A character string to be trimmed from the source string, if it has multiple
characters, the function
    + * searches for each character in the source string, removes the characters from the
source string until it
    + * encounters the first non-match character.
    + * BOTH: removes any characters from both ends of the source string that matches characters
in the trim string.
    +  */
     @ExpressionDescription(
    -  usage = "_FUNC_(str) - Removes the leading and trailing space characters from `str`.",
    +  usage = """
    +    _FUNC_(str) - Removes the leading and trailing space characters from `str`.
    +    _FUNC_(BOTH trimStr FROM str) - Remove the leading and trailing trimString from `str`
    +  """,
    +  arguments = """
    +    Arguments:
    +      * str - a string expression
    +      * trimString - the trim string
    +      * BOTH, FROM - these are keyword to specify for trim string from both ends of the
string
    +  """,
       examples = """
         Examples:
           > SELECT _FUNC_('    SparkSQL   ');
            SparkSQL
    +      > SELECT _FUNC_(BOTH 'SL' FROM 'SSparkSQLS');
    +       parkSQ
       """)
    -case class StringTrim(child: Expression)
    -  extends UnaryExpression with String2StringExpression {
    +case class StringTrim(
    +    srcStr: Expression,
    +    trimStr: Option[Expression] = None)
    +  extends String2TrimExpression {
     
    -  def convert(v: UTF8String): UTF8String = v.trim()
    +  def this (trimStr: Expression, srcStr: Expression) = this(srcStr, Option(trimStr))
    +
    +  def this(srcStr: Expression) = this(srcStr, None)
     
       override def prettyName: String = "trim"
     
    +  override def children: Seq[Expression] = if (trimStr.isDefined) {
    +    srcStr :: trimStr.get :: Nil
    +  } else {
    +    srcStr :: Nil
    +  }
    +  override def eval(input: InternalRow): Any = {
    +    val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
    +    if (srcString != null) {
    +      if (trimStr.isDefined) {
    +        return srcString.trim(trimStr.get.eval(input).asInstanceOf[UTF8String])
    +      } else {
    +        return srcString.trim()
    +      }
    +    }
    +    null
    +  }
    +
       override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    -    defineCodeGen(ctx, ev, c => s"($c).trim()")
    +    val evals = children.map(_.genCode(ctx))
    +    val srcString = evals(0)
    +
    +    if (evals.length == 1) {
    +      ev.copy(evals.map(_.code).mkString("\n") + s"""
    +        boolean ${ev.isNull} = false;
    +        UTF8String ${ev.value} = null;
    +        if (${srcString.isNull}) {
    +          ${ev.isNull} = true;
    +        } else {
    +          ${ev.value} = ${srcString.value}.trim();
    +        }
    +         """.stripMargin)
    +    } else {
    +      val trimString = evals(1)
    +      val getTrimFunction =
    +        s"""
    +        if (${trimString.isNull}) {
    +          ${ev.isNull} = true;
    +        } else {
    +          ${ev.value} = ${srcString.value}.trim(${trimString.value});
    +      }""".stripMargin
    --- End diff --
    
    will change.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message