spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [spark] cloud-fan commented on a change in pull request #29891: [SPARK-30796][SQL] Add parameter position for REGEXP_REPLACE
Date Fri, 16 Oct 2020 03:34:46 GMT

cloud-fan commented on a change in pull request #29891:
URL: https://github.com/apache/spark/pull/29891#discussion_r506027018



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
##########
@@ -318,16 +320,46 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(str, regexp, rep) - Replaces all substrings of `str` that match `regexp`
with `rep`.",
+  usage = "_FUNC_(str, regexp, rep[, position]) - Replaces all substrings of `str` that match
`regexp` with `rep`.",
+  arguments = """
+    Arguments:
+      * str - a string expression to search for a regular expression pattern match.
+      * regexp - a string representing a regular expression. The regex string should be a
+          Java regular expression.
+
+          Since Spark 2.0, string literals (including regex patterns) are unescaped in our
SQL
+          parser. For example, to match "\abc", a regular expression for `regexp` can be
+          "^\\abc$".
+
+          There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used
to
+          fallback to the Spark 1.6 behavior regarding string literal parsing. For example,
+          if the config is enabled, the `regexp` that can match "\abc" is "^\abc$".
+      * rep - a string expression to replace matched substrings.
+      * position - a positive integer expression that indicates the position within `str`
to begin searching.
+          The default is 1. If position is greater than the number of characters in `str`,
the result is `str`.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_('100-200', '(\\d+)', 'num');
        num-num
   """,
   since = "1.5.0")
 // scalastyle:on line.size.limit
-case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression, pos: Expression)
+  extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (!pos.foldable) {
+      return TypeCheckFailure(s"Position expression must be foldable, but got $pos")
+    }
+
+    val i = pos.eval().asInstanceOf[Int]

Review comment:
       can we test it and implement it correctly regarding null position?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message