Return-Path: X-Original-To: apmail-spark-commits-archive@minotaur.apache.org Delivered-To: apmail-spark-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 500501847A for ; Wed, 19 Aug 2015 03:31:11 +0000 (UTC) Received: (qmail 33662 invoked by uid 500); 19 Aug 2015 03:31:11 -0000 Delivered-To: apmail-spark-commits-archive@spark.apache.org Received: (qmail 33634 invoked by uid 500); 19 Aug 2015 03:31:11 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 33625 invoked by uid 99); 19 Aug 2015 03:31:11 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 19 Aug 2015 03:31:11 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id E1CC4E08EA; Wed, 19 Aug 2015 03:31:10 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: shivaram@apache.org To: commits@spark.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR Date: Wed, 19 Aug 2015 03:31:10 +0000 (UTC) Repository: spark Updated Branches: refs/heads/branch-1.5 bb2fb59f9 -> ebaeb1892 [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR - Add `when` and `otherwise` as `Column` methods - Add `When` as an expression function - Add `%otherwise%` infix as an alias of `otherwise` Since R doesn't support a feature like method chaining, `otherwise(when(condition, value), value)` style is a little annoying for me. If `%otherwise%` looks strange for shivaram, I can remove it. What do you think? ### JIRA [[SPARK-10075] Add `when` expressino function in SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10075) Author: Yu ISHIKAWA Closes #8266 from yu-iskw/SPARK-10075. (cherry picked from commit bf32c1f7f47dd907d787469f979c5859e02ce5e6) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebaeb189 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebaeb189 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebaeb189 Branch: refs/heads/branch-1.5 Commit: ebaeb189260dd338fc5a91d8ec3ff6d45989991a Parents: bb2fb59 Author: Yu ISHIKAWA Authored: Tue Aug 18 20:27:36 2015 -0700 Committer: Shivaram Venkataraman Committed: Tue Aug 18 20:29:34 2015 -0700 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 2 ++ R/pkg/R/column.R | 14 ++++++++++++++ R/pkg/R/functions.R | 14 ++++++++++++++ R/pkg/R/generics.R | 8 ++++++++ R/pkg/inst/tests/test_sparkSQL.R | 7 +++++++ 5 files changed, 45 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 607aef2..8fa12d5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -152,6 +152,7 @@ exportMethods("abs", "n_distinct", "nanvl", "negate", + "otherwise", "pmod", "quarter", "reverse", @@ -182,6 +183,7 @@ exportMethods("abs", "unhex", "upper", "weekofyear", + "when", "year") exportClasses("GroupedData") http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/column.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 328f595..5a07ebd 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -203,3 +203,17 @@ setMethod("%in%", jc <- callJMethod(x@jc, "in", table) return(column(jc)) }) + +#' otherwise +#' +#' If values in the specified column are null, returns the value. +#' Can be used in conjunction with `when` to specify a default value for expressions. +#' +#' @rdname column +setMethod("otherwise", + signature(x = "Column", value = "ANY"), + function(x, value) { + value <- ifelse(class(value) == "Column", value@jc, value) + jc <- callJMethod(x@jc, "otherwise", value) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/functions.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index e606b20..366c230 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -165,3 +165,17 @@ setMethod("n", signature(x = "Column"), function(x) { count(x) }) + +#' when +#' +#' Evaluates a list of conditions and returns one of multiple possible result expressions. +#' For unmatched expressions null is returned. +#' +#' @rdname column +setMethod("when", signature(condition = "Column", value = "ANY"), + function(condition, value) { + condition <- condition@jc + value <- ifelse(class(value) == "Column", value@jc, value) + jc <- callJStatic("org.apache.spark.sql.functions", "when", condition, value) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 5c1cc98..338b32e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -651,6 +651,14 @@ setGeneric("rlike", function(x, ...) { standardGeneric("rlike") }) #' @export setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") }) +#' @rdname column +#' @export +setGeneric("when", function(condition, value) { standardGeneric("when") }) + +#' @rdname column +#' @export +setGeneric("otherwise", function(x, value) { standardGeneric("otherwise") }) + ###################### Expression Function Methods ########################## http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/inst/tests/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 83caba8..841de65 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -727,6 +727,13 @@ test_that("greatest() and least() on a DataFrame", { expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3)) }) +test_that("when() and otherwise() on a DataFrame", { + l <- list(list(a = 1, b = 2), list(a = 3, b = 4)) + df <- createDataFrame(sqlContext, l) + expect_equal(collect(select(df, when(df$a > 1 & df$b > 2, 1)))[, 1], c(NA, 1)) + expect_equal(collect(select(df, otherwise(when(df$a > 1, 1), 0)))[, 1], c(0, 1)) +}) + test_that("group by", { df <- jsonFile(sqlContext, jsonPath) df1 <- agg(df, name = "max", age = "sum") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org