spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wzhfy <...@git.apache.org>
Subject [GitHub] spark pull request #12646: [SPARK-14878][SQL] Trim characters string functio...
Date Tue, 16 May 2017 05:14:59 GMT
Github user wzhfy commented on a diff in the pull request:

    https://github.com/apache/spark/pull/12646#discussion_r116656332
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
---
    @@ -461,68 +462,249 @@ case class FindInSet(left: Expression, right: Expression) extends
BinaryExpressi
     }
     
     /**
    - * A function that trim the spaces from both ends for the specified string.
    + * A function that trim the spaces or a trim string from both ends for the specified
string.
      */
     @ExpressionDescription(
    -  usage = "_FUNC_(str) - Removes the leading and trailing space characters from `str`.",
    +  usage = """
    +    _FUNC_(str) - Removes the leading and trailing space characters from `str`.
    +    _FUNC_(BOTH trimString FROM str) - Remove the leading and trailing trimString from
`str`
    +    _FUNC_(LEADING trimChar FROM str) - Remove the leading trimString from `str`
    +    _FUNC_(TRAILING trimChar FROM str) - Remove the trailing trimString from `str`
    +  """,
       extended = """
    +    Arguments:
    +      str - a string expression
    +      trimString - the trim string
    +      BOTH, FROM - these are keyword to specify for trim string from both side of the
string
    +      LEADING, FROM - these are keyword to specify for trim string from left side of
the string
    +      TRAILING, FROM - these are keyword to specify for trim string from right side of
the string
         Examples:
           > SELECT _FUNC_('    SparkSQL   ');
            SparkSQL
    +      > SELECT _FUNC_(BOTH 'SL' FROM 'SSparkSQLS');
    +       parkSQ
    +      > SELECT _FUNC_(LEADING 'paS' FROM 'SSparkSQLS');
    +       rkSQLS
    +      > SELECT _FUNC_(TRAILING 'SLQ' FROM 'SSparkSQLS');
    +       SSparkS
       """)
    -case class StringTrim(child: Expression)
    -  extends UnaryExpression with String2StringExpression {
    +case class StringTrim(children: Seq[Expression])
    +  extends Expression with ImplicitCastInputTypes {
     
    -  def convert(v: UTF8String): UTF8String = v.trim()
    +  require(children.size <= 2 && children.nonEmpty,
    +    s"$prettyName requires at least one argument and no more than two.")
    +
    +  override def dataType: DataType = StringType
    +  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
    +
    +  override def nullable: Boolean = children.exists(_.nullable)
    +  override def foldable: Boolean = children.forall(_.foldable)
     
       override def prettyName: String = "trim"
     
    +  override def eval(input: InternalRow): Any = {
    +    val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
    +    if (inputs(0) != null) {
    +      if (children.size == 1) {
    +        return inputs(0).trim()
    +      } else if (inputs(1) != null) {
    +        return inputs(1).trim(inputs(0))
    +      }
    +    }
    +    null
    +  }
    +
       override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    -    defineCodeGen(ctx, ev, c => s"($c).trim()")
    +    if (children.size == 2 && ! children(0).isInstanceOf[Literal]) {
    +      throw new AnalysisException(s"The trimming parameter should be Literal.")}
    +
    +    val evals = children.map(_.genCode(ctx))
    +    val inputs = evals.map { eval =>
    +      s"${eval.isNull} ? null : ${eval.value}"
    +    }
    +    val getTrimFunction = if (children.size == 1) {
    +      s"""UTF8String ${ev.value} = ${inputs(0)}.trim();"""
    +    } else {
    +      s"""UTF8String ${ev.value} = ${inputs(1)}.trim(${inputs(0)});""".stripMargin
    +    }
    +    ev.copy(evals.map(_.code).mkString("\n") +
    +      s"""
    +    boolean ${ev.isNull} = false;
    +    ${getTrimFunction};
    +    if (${ev.value} == null) {
    +      ${ev.isNull} = true;
    +    }
    +    """)
    +    }
    +
    +  override def sql: String = {
    +    if (children.size == 1) {
    +      val childrenSQL = children.map(_.sql).mkString(", ")
    +      s"$prettyName($childrenSQL)"
    +    } else {
    +      val trimSQL = children(0).map(_.sql).mkString(", ")
    +      val tarSQL = children(1).map(_.sql).mkString(", ")
    +      s"$prettyName($trimSQL, $tarSQL)"
    +    }
       }
     }
     
     /**
    - * A function that trim the spaces from left end for given string.
    + * A function that trim the spaces or a trim string from left end for given string.
      */
     @ExpressionDescription(
    -  usage = "_FUNC_(str) - Removes the leading and trailing space characters from `str`.",
    +  usage = """
    +    _FUNC_(str) - Removes the leading space characters from `str`.
    +    _FUNC_(trimStr, str) - Removes the leading string contains the characters from the
trim string from the `str`
    +  """,
       extended = """
    +    Arguments:
    +      str - a string expression
    +      trimStr - the trim string
         Examples:
    -      > SELECT _FUNC_('    SparkSQL');
    +      > SELECT _FUNC_('    SparkSQL   ');
            SparkSQL
    +      > SELECT _FUNC_('Sp', 'SSparkSQLS');
    +       arkSQLS
       """)
    -case class StringTrimLeft(child: Expression)
    -  extends UnaryExpression with String2StringExpression {
    +case class StringTrimLeft(children: Seq[Expression])
    +  extends Expression with ImplicitCastInputTypes {
    +
    +  require (children.size <= 2 && children.nonEmpty,
    +    "$prettyName requires at least one argument and no more than two.")
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
    +  override def dataType: DataType = StringType
     
    -  def convert(v: UTF8String): UTF8String = v.trimLeft()
    +  override def nullable: Boolean = children.exists(_.nullable)
    +  override def foldable: Boolean = children.forall(_.foldable)
     
       override def prettyName: String = "ltrim"
     
    -  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    -    defineCodeGen(ctx, ev, c => s"($c).trimLeft()")
    +  override def eval(input: InternalRow): Any = {
    +    val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
    +    if (inputs(0) != null) {
    +      if (children.size == 1) {
    +        return inputs(0).trimLeft()
    +      } else if (inputs(1) != null) {
    +        return inputs(1).trimLeft(inputs(0))
    +      }
    +    }
    +    null
    +  }
    +
    +  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    +    if (children.size == 2 && ! children(0).isInstanceOf[Literal]) {
    +      throw new AnalysisException(s"The trimming parameter should be Literal.")}
    +
    +    val evals = children.map(_.genCode(ctx))
    +    val inputs = evals.map { eval =>
    +      s"${eval.isNull} ? null : ${eval.value}"
    +    }
    +    val getTrimLeftFunction = if (children.size == 1) {
    +      s"""UTF8String ${ev.value} = ${inputs(0)}.trimLeft();"""
    --- End diff --
    
    only one `"` is needed here.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message