spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kevinyu98 <...@git.apache.org>
Subject [GitHub] spark pull request #12646: [SPARK-14878][SQL] Trim characters string functio...
Date Wed, 31 Aug 2016 23:57:11 GMT
Github user kevinyu98 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/12646#discussion_r77094689
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
---
    @@ -431,56 +432,233 @@ case class FindInSet(left: Expression, right: Expression) extends
BinaryExpressi
     }
     
     /**
    - * A function that trim the spaces from both ends for the specified string.
    + * A function that trim the spaces or a character from both ends for the specified string.
      */
     @ExpressionDescription(
    -  usage = "_FUNC_(str) - Removes the leading and trailing space characters from str.",
    -  extended = "> SELECT _FUNC_('    SparkSQL   ');\n 'SparkSQL'")
    -case class StringTrim(child: Expression)
    -  extends UnaryExpression with String2StringExpression {
    +  usage = "_FUNC_(str) - Removes the leading and trailing space characters or char from
str.",
    +  extended = "> SELECT _FUNC_('    SparkSQL   ');\n 'SparkSQL'\n" +
    +             "> SELECT _FUNC_('S', 'SSparkSQLS');\n 'parkSQL'\n" +
    +             "> SELECT _FUNC_(BOTH 'S' FROM 'SSparkSQLS');\n 'parkSQL'\n" +
    +             "> SELECT _FUNC_(LEADING 'S' FROM 'SSparkSQLS');\n 'parkSQLS'\n" +
    +             "> SELECT _FUNC_(TRAILING 'S' FROM 'SSparkSQLS');\n 'SSparkSQL'")
    +case class StringTrim(children: Seq[Expression])
    +  extends Expression with ImplicitCastInputTypes {
    +
    +  require (children.size <= 2 && children.nonEmpty,
    +    "$prettyName requires at least one argument and no more than two.")
    +
    +  override def dataType: DataType = StringType
    +  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
     
    -  def convert(v: UTF8String): UTF8String = v.trim()
    +  override def nullable: Boolean = children.exists(_.nullable)
    +  override def foldable: Boolean = children.forall(_.foldable)
     
       override def prettyName: String = "trim"
     
    +  override def eval(input: InternalRow): Any = {
    +    val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
    +    if (inputs(0) != null) {
    +      if (children.size == 1) {
    +        return inputs(0).trim()
    +      } else if (inputs(1) != null) {
    +        if (inputs(0).numChars > 1) {
    +          throw new AnalysisException(s"Trim character '${inputs(0)}' can not be greater
than " +
    +            s"1 character.")
    +        } else {
    +          return inputs(1).trim(inputs(0))
    +        }
    +      }
    +    }
    +    null
    +  }
    +
       override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    -    defineCodeGen(ctx, ev, c => s"($c).trim()")
    +    if (children.size == 2 &&
    +       (! children(0).isInstanceOf[Literal] || children(0).toString.length > 1)) {
    +      throw new AnalysisException(s"The trimming parameter should be Literal " +
    +        s"and only one character.") }
    +
    +    val evals = children.map(_.genCode(ctx))
    +    val inputs = evals.map { eval =>
    +      s"${eval.isNull} ? null : ${eval.value}"
    +    }
    +    val getTrimFunction = if (children.size == 1) {
    +      s"""UTF8String ${ev.value} = ${inputs(0)}.trim();"""
    +    } else {
    +      s"""UTF8String ${ev.value} = ${inputs(1)}.trim(${inputs(0)});""".stripMargin
    +    }
    +    ev.copy(evals.map(_.code).mkString("\n") +
    +    s"""
    +    boolean ${ev.isNull} = false;
    +    ${getTrimFunction};
    +    if (${ev.value} == null) {
    +      ${ev.isNull} = true;
    +    }
    +    """)
    +  }
    +
    +  override def sql: String = {
    +    if (children.size == 1) {
    +      val childrenSQL = children.map(_.sql).mkString(", ")
    +      s"$prettyName($childrenSQL)"
    +    } else {
    +      val trimSQL = children(0).map(_.sql).mkString(", ")
    +      val tarSQL = children(1).map(_.sql).mkString(", ")
    +      s"$prettyName($trimSQL, $tarSQL)"
    +    }
       }
     }
     
     /**
    - * A function that trim the spaces from left end for given string.
    + * A function that trim the spaces or a character from left end for given string.
      */
     @ExpressionDescription(
       usage = "_FUNC_(str) - Removes the leading space characters from str.",
    -  extended = "> SELECT _FUNC_('    SparkSQL   ');\n 'SparkSQL   '")
    -case class StringTrimLeft(child: Expression)
    -  extends UnaryExpression with String2StringExpression {
    +  extended = "> SELECT _FUNC_('    SparkSQL   ');\n 'SparkSQL   '\n" +
    +             "> SELECT _FUNC_('S', 'SSparkSQLS');\n 'parkSQLS'\n" +
    +             "> SELECT _FUNC_(LEADING 'S' FROM 'SSparkSQLS');\n 'parkSQLS'")
    +case class StringTrimLeft(children: Seq[Expression])
    +  extends Expression with ImplicitCastInputTypes {
    +
    +  require (children.size <= 2 && children.nonEmpty,
    +    "$prettyName requires at least one argument and no more than two.")
     
    -  def convert(v: UTF8String): UTF8String = v.trimLeft()
    +  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
    +  override def dataType: DataType = StringType
    +
    +  override def nullable: Boolean = children.exists(_.nullable)
    +  override def foldable: Boolean = children.forall(_.foldable)
     
       override def prettyName: String = "ltrim"
     
    -  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    -    defineCodeGen(ctx, ev, c => s"($c).trimLeft()")
    +  override def eval(input: InternalRow): Any = {
    +    val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
    +    if (inputs(0) != null) {
    +      if (children.size == 1) {
    +        return inputs(0).trimLeft()
    +      } else if (inputs(1) != null) {
    +        if (inputs(0).numChars > 1) {
    +          throw new AnalysisException(s"Trim character '${inputs(0)}' can not be greater
than" +
    +            s" 1 character.")
    +        } else {
    +          return inputs(1).trimLeft(inputs(0))
    +        }
    +      }
    +    }
    +    null
    +  }
    +
    +    override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    +    if (children.size == 2 &&
    +       (! children(0).isInstanceOf[Literal] || children(0).toString.length > 1)) {
    +      throw new AnalysisException(s"The trimming parameter should be Literal " +
    +        s"and only one character.") }
    +
    +    val evals = children.map(_.genCode(ctx))
    +    val inputs = evals.map { eval =>
    +      s"${eval.isNull} ? null : ${eval.value}"
    +    }
    +    val getTrimLeftFunction = if (children.size == 1) {
    +      s"""UTF8String ${ev.value} = ${inputs(0)}.trimLeft();"""
    +    } else {
    +      s"""UTF8String ${ev.value} = ${inputs(1)}.trimLeft(${inputs(0)});"""
    +    }
    +
    +    ev.copy(evals.map(_.code).mkString("\n") +
    +      s"""
    +    boolean ${ev.isNull} = false;
    +    ${getTrimLeftFunction};
    +    if (${ev.value} == null) {
    +      ${ev.isNull} = true;
    +    }
    +    """)
    +  }
    +
    +  override def sql: String = {
    +    if (children.size == 1) {
    +      val childrenSQL = children.map(_.sql).mkString(", ")
    +      s"$prettyName($childrenSQL)"
    +    } else {
    +      val trimSQL = children(0).map(_.sql).mkString(", ")
    +      val tarSQL = children(1).map(_.sql).mkString(", ")
    +      s"$prettyName($trimSQL, $tarSQL)"
    +    }
       }
     }
     
     /**
    - * A function that trim the spaces from right end for given string.
    + * A function that trim the spaces or a character from right end for given string.
      */
     @ExpressionDescription(
       usage = "_FUNC_(str) - Removes the trailing space characters from str.",
    -  extended = "> SELECT _FUNC_('    SparkSQL   ');\n '    SparkSQL'")
    -case class StringTrimRight(child: Expression)
    -  extends UnaryExpression with String2StringExpression {
    +  extended = "> SELECT _FUNC_('    SparkSQL   ');\n '    SparkSQL'\n" +
    +             "> SELECT _FUNC_('S', 'SSparkSQLS');\n 'SSparkSQL'\n" +
    +             "> SELECT _FUNC_(TRAILING 'S' FROM 'SSparkSQLS');\n 'SSparkSQL'")
    +case class StringTrimRight(children: Seq[Expression])
    +  extends Expression with ImplicitCastInputTypes {
    +
    +  require (children.size <= 2 && children.nonEmpty,
    +    "$prettyName requires at least one argument and no more than two.")
     
    -  def convert(v: UTF8String): UTF8String = v.trimRight()
    +  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
    +  override def dataType: DataType = StringType
    +
    +  override def nullable: Boolean = children.exists(_.nullable)
    +  override def foldable: Boolean = children.forall(_.foldable)
     
       override def prettyName: String = "rtrim"
     
    -  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    -    defineCodeGen(ctx, ev, c => s"($c).trimRight()")
    +  override def eval(input: InternalRow): Any = {
    +    val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
    +    if (inputs(0) != null) {
    +      if (children.size == 1) {
    +        return inputs(0).trimRight()
    +      } else if (inputs(1) != null) {
    +        if (inputs(0).numChars > 1) {
    --- End diff --
    
    I put there initially, but then it didn't cover the code path from dataframe, that is
why I put it here to catch all the cases. What do you think? Thanks.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message