Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id C5F26200D6A for ; Sat, 30 Dec 2017 18:50:09 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id C45B7160C22; Sat, 30 Dec 2017 17:50:09 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 16BDD160C0C for ; Sat, 30 Dec 2017 18:50:08 +0100 (CET) Received: (qmail 8938 invoked by uid 500); 30 Dec 2017 17:50:08 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 8929 invoked by uid 99); 30 Dec 2017 17:50:07 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 30 Dec 2017 17:50:07 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id EEB99DFCFA; Sat, 30 Dec 2017 17:50:05 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: gurwls223@apache.org To: commits@spark.apache.org Message-Id: <39961bacf7f74a7b9337d6aa2c187910@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-22924][SPARKR] R API for sortWithinPartitions Date: Sat, 30 Dec 2017 17:50:05 +0000 (UTC) archived-at: Sat, 30 Dec 2017 17:50:10 -0000 Repository: spark Updated Branches: refs/heads/master fd7d141d8 -> ea0a5eef2 [SPARK-22924][SPARKR] R API for sortWithinPartitions ## What changes were proposed in this pull request? Add to `arrange` the option to sort only within partition ## How was this patch tested? manual, unit tests Author: Felix Cheung Closes #20118 from felixcheung/rsortwithinpartition. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea0a5eef Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea0a5eef Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea0a5eef Branch: refs/heads/master Commit: ea0a5eef2238daa68a15b60a6f1a74c361216140 Parents: fd7d141 Author: Felix Cheung Authored: Sun Dec 31 02:50:00 2017 +0900 Committer: hyukjinkwon Committed: Sun Dec 31 02:50:00 2017 +0900 ---------------------------------------------------------------------- R/pkg/R/DataFrame.R | 14 ++++++++++---- R/pkg/tests/fulltests/test_sparkSQL.R | 5 +++++ 2 files changed, 15 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ea0a5eef/R/pkg/R/DataFrame.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index ace49da..fe238f6 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2297,6 +2297,7 @@ setClassUnion("characterOrColumn", c("character", "Column")) #' @param ... additional sorting fields #' @param decreasing a logical argument indicating sorting order for columns when #' a character vector is specified for col +#' @param withinPartitions a logical argument indicating whether to sort only within each partition #' @return A SparkDataFrame where all elements are sorted. #' @family SparkDataFrame functions #' @aliases arrange,SparkDataFrame,Column-method @@ -2312,16 +2313,21 @@ setClassUnion("characterOrColumn", c("character", "Column")) #' arrange(df, asc(df$col1), desc(abs(df$col2))) #' arrange(df, "col1", decreasing = TRUE) #' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE)) +#' arrange(df, "col1", "col2", withinPartitions = TRUE) #' } #' @note arrange(SparkDataFrame, Column) since 1.4.0 setMethod("arrange", signature(x = "SparkDataFrame", col = "Column"), - function(x, col, ...) { + function(x, col, ..., withinPartitions = FALSE) { jcols <- lapply(list(col, ...), function(c) { c@jc }) - sdf <- callJMethod(x@sdf, "sort", jcols) + if (withinPartitions) { + sdf <- callJMethod(x@sdf, "sortWithinPartitions", jcols) + } else { + sdf <- callJMethod(x@sdf, "sort", jcols) + } dataFrame(sdf) }) @@ -2332,7 +2338,7 @@ setMethod("arrange", #' @note arrange(SparkDataFrame, character) since 1.4.0 setMethod("arrange", signature(x = "SparkDataFrame", col = "character"), - function(x, col, ..., decreasing = FALSE) { + function(x, col, ..., decreasing = FALSE, withinPartitions = FALSE) { # all sorting columns by <- list(col, ...) @@ -2356,7 +2362,7 @@ setMethod("arrange", } }) - do.call("arrange", c(x, jcols)) + do.call("arrange", c(x, jcols, withinPartitions = withinPartitions)) }) #' @rdname arrange http://git-wip-us.apache.org/repos/asf/spark/blob/ea0a5eef/R/pkg/tests/fulltests/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 1b7d53f..5197838 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2130,6 +2130,11 @@ test_that("arrange() and orderBy() on a DataFrame", { sorted7 <- arrange(df, "name", decreasing = FALSE) expect_equal(collect(sorted7)[2, "age"], 19) + + df <- createDataFrame(cars, numPartitions = 10) + expect_equal(getNumPartitions(df), 10) + sorted8 <- arrange(df, "dist", withinPartitions = TRUE) + expect_equal(collect(sorted8)[5:6, "dist"], c(22, 10)) }) test_that("filter() on a DataFrame", { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org