spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From shiva...@apache.org
Subject spark git commit: [SPARK-12645][SPARKR] SparkR support hash function
Date Sat, 09 Jan 2016 07:00:41 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 a6190508b -> 8b5f23043


[SPARK-12645][SPARKR] SparkR support hash function

Add ```hash``` function for SparkR ```DataFrame```.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #10597 from yanboliang/spark-12645.

(cherry picked from commit 3d77cffec093bed4d330969f1a996f3358b9a772)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8b5f2304
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8b5f2304
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8b5f2304

Branch: refs/heads/branch-1.6
Commit: 8b5f23043322254c725c703c618ba3d3cc4a4240
Parents: a619050
Author: Yanbo Liang <ybliang8@gmail.com>
Authored: Sat Jan 9 12:29:51 2016 +0530
Committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Committed: Sat Jan 9 12:30:29 2016 +0530

----------------------------------------------------------------------
 R/pkg/NAMESPACE                           |  1 +
 R/pkg/R/functions.R                       | 20 ++++++++++++++++++++
 R/pkg/R/generics.R                        |  4 ++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  2 +-
 4 files changed, 26 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/8b5f2304/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index beacc39..34be7f0 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -130,6 +130,7 @@ exportMethods("%in%",
               "count",
               "countDistinct",
               "crc32",
+              "hash",
               "cume_dist",
               "date_add",
               "date_format",

http://git-wip-us.apache.org/repos/asf/spark/blob/8b5f2304/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index df36bc8..9bb7876 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -340,6 +340,26 @@ setMethod("crc32",
             column(jc)
           })
 
+#' hash
+#'
+#' Calculates the hash code of given columns, and returns the result as a int column.
+#'
+#' @rdname hash
+#' @name hash
+#' @family misc_funcs
+#' @export
+#' @examples \dontrun{hash(df$c)}
+setMethod("hash",
+          signature(x = "Column"),
+          function(x, ...) {
+            jcols <- lapply(list(x, ...), function (x) {
+              stopifnot(class(x) == "Column")
+              x@jc
+            })
+            jc <- callJStatic("org.apache.spark.sql.functions", "hash", jcols)
+            column(jc)
+          })
+
 #' dayofmonth
 #'
 #' Extracts the day of the month as an integer from a given date/timestamp/string.

http://git-wip-us.apache.org/repos/asf/spark/blob/8b5f2304/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ba68617..5ba68e3 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -736,6 +736,10 @@ setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct")
 #' @export
 setGeneric("crc32", function(x) { standardGeneric("crc32") })
 
+#' @rdname hash
+#' @export
+setGeneric("hash", function(x, ...) { standardGeneric("hash") })
+
 #' @rdname cume_dist
 #' @export
 setGeneric("cume_dist", function(x) { standardGeneric("cume_dist") })

http://git-wip-us.apache.org/repos/asf/spark/blob/8b5f2304/R/pkg/inst/tests/testthat/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 73f311e..e7c7eec 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -918,7 +918,7 @@ test_that("column functions", {
   c <- column("a")
   c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
   c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
-  c3 <- cosh(c) + count(c) + crc32(c) + exp(c)
+  c3 <- cosh(c) + count(c) + crc32(c) + hash(c) + exp(c)
   c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c)
   c5 <- hour(c) + initcap(c) + last(c) + last_day(c) + length(c)
   c6 <- log(c) + (c) + log1p(c) + log2(c) + lower(c) + ltrim(c) + max(c) + md5(c)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message