spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From shiva...@apache.org
Subject spark git commit: [SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files
Date Tue, 23 Jun 2015 03:55:43 GMT
Repository: spark
Updated Branches:
  refs/heads/master c4d234396 -> 44fa7df64


[SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files

[[SPARK-8548] Remove the trailing whitespaces from the SparkR files - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-8548)

- This is the result of `lint-r`
    https://gist.github.com/yu-iskw/0019b37a2c1167f33986

Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com>

Closes #6945 from yu-iskw/SPARK-8548 and squashes the following commits:

0bd567a [Yu ISHIKAWA] [SPARK-8548][SparkR] Remove the trailing whitespaces from the SparkR files


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/44fa7df6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/44fa7df6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/44fa7df6

Branch: refs/heads/master
Commit: 44fa7df64daa55bd6eb1f2c219a9701b34e1c2a3
Parents: c4d2343
Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com>
Authored: Mon Jun 22 20:55:38 2015 -0700
Committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Committed: Mon Jun 22 20:55:38 2015 -0700

----------------------------------------------------------------------
 R/pkg/R/DataFrame.R                     | 96 ++++++++++++++--------------
 R/pkg/R/RDD.R                           | 48 +++++++-------
 R/pkg/R/SQLContext.R                    | 14 ++--
 R/pkg/R/broadcast.R                     |  6 +-
 R/pkg/R/deserialize.R                   |  2 +-
 R/pkg/R/generics.R                      | 15 ++---
 R/pkg/R/group.R                         |  1 -
 R/pkg/R/jobj.R                          |  2 +-
 R/pkg/R/pairRDD.R                       |  4 +-
 R/pkg/R/schema.R                        |  2 +-
 R/pkg/R/serialize.R                     |  2 +-
 R/pkg/R/sparkR.R                        |  6 +-
 R/pkg/R/utils.R                         | 48 +++++++-------
 R/pkg/R/zzz.R                           |  1 -
 R/pkg/inst/tests/test_binaryFile.R      |  7 +-
 R/pkg/inst/tests/test_binary_function.R | 28 ++++----
 R/pkg/inst/tests/test_rdd.R             | 12 ++--
 R/pkg/inst/tests/test_shuffle.R         | 28 ++++----
 R/pkg/inst/tests/test_sparkSQL.R        | 28 ++++----
 R/pkg/inst/tests/test_take.R            |  1 -
 R/pkg/inst/tests/test_textFile.R        |  7 +-
 R/pkg/inst/tests/test_utils.R           | 12 ++--
 22 files changed, 182 insertions(+), 188 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0af5cb8..6feabf4 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -38,7 +38,7 @@ setClass("DataFrame",
 setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
   .Object@env <- new.env()
   .Object@env$isCached <- isCached
-  
+
   .Object@sdf <- sdf
   .Object
 })
@@ -55,11 +55,11 @@ dataFrame <- function(sdf, isCached = FALSE) {
 ############################ DataFrame Methods ##############################################
 
 #' Print Schema of a DataFrame
-#' 
+#'
 #' Prints out the schema in tree format
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname printSchema
 #' @export
 #' @examples
@@ -78,11 +78,11 @@ setMethod("printSchema",
           })
 
 #' Get schema object
-#' 
+#'
 #' Returns the schema of this DataFrame as a structType object.
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname schema
 #' @export
 #' @examples
@@ -100,9 +100,9 @@ setMethod("schema",
           })
 
 #' Explain
-#' 
+#'
 #' Print the logical and physical Catalyst plans to the console for debugging.
-#' 
+#'
 #' @param x A SparkSQL DataFrame
 #' @param extended Logical. If extended is False, explain() only prints the physical plan.
 #' @rdname explain
@@ -200,11 +200,11 @@ setMethod("show", "DataFrame",
           })
 
 #' DataTypes
-#' 
+#'
 #' Return all column names and their data types as a list
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname dtypes
 #' @export
 #' @examples
@@ -224,11 +224,11 @@ setMethod("dtypes",
           })
 
 #' Column names
-#' 
+#'
 #' Return all column names as a list
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname columns
 #' @export
 #' @examples
@@ -256,12 +256,12 @@ setMethod("names",
           })
 
 #' Register Temporary Table
-#' 
+#'
 #' Registers a DataFrame as a Temporary Table in the SQLContext
-#' 
+#'
 #' @param x A SparkSQL DataFrame
 #' @param tableName A character vector containing the name of the table
-#' 
+#'
 #' @rdname registerTempTable
 #' @export
 #' @examples
@@ -306,11 +306,11 @@ setMethod("insertInto",
           })
 
 #' Cache
-#' 
+#'
 #' Persist with the default storage level (MEMORY_ONLY).
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname cache-methods
 #' @export
 #' @examples
@@ -400,7 +400,7 @@ setMethod("repartition",
           signature(x = "DataFrame", numPartitions = "numeric"),
           function(x, numPartitions) {
             sdf <- callJMethod(x@sdf, "repartition", numToInt(numPartitions))
-            dataFrame(sdf)     
+            dataFrame(sdf)
           })
 
 # toJSON
@@ -489,7 +489,7 @@ setMethod("distinct",
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlContext, path)
-#' collect(sample(df, FALSE, 0.5)) 
+#' collect(sample(df, FALSE, 0.5))
 #' collect(sample(df, TRUE, 0.5))
 #'}
 setMethod("sample",
@@ -513,11 +513,11 @@ setMethod("sample_frac",
           })
 
 #' Count
-#' 
+#'
 #' Returns the number of rows in a DataFrame
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname count
 #' @export
 #' @examples
@@ -568,13 +568,13 @@ setMethod("collect",
           })
 
 #' Limit
-#' 
+#'
 #' Limit the resulting DataFrame to the number of rows specified.
-#' 
+#'
 #' @param x A SparkSQL DataFrame
 #' @param num The number of rows to return
 #' @return A new DataFrame containing the number of rows specified.
-#' 
+#'
 #' @rdname limit
 #' @export
 #' @examples
@@ -593,7 +593,7 @@ setMethod("limit",
           })
 
 #' Take the first NUM rows of a DataFrame and return a the results as a data.frame
-#' 
+#'
 #' @rdname take
 #' @export
 #' @examples
@@ -613,8 +613,8 @@ setMethod("take",
 
 #' Head
 #'
-#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL, 
-#' then head() returns the first 6 rows in keeping with the current data.frame 
+#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
+#' then head() returns the first 6 rows in keeping with the current data.frame
 #' convention in R.
 #'
 #' @param x A SparkSQL DataFrame
@@ -659,11 +659,11 @@ setMethod("first",
           })
 
 # toRDD()
-# 
+#
 # Converts a Spark DataFrame to an RDD while preserving column names.
-# 
+#
 # @param x A Spark DataFrame
-# 
+#
 # @rdname DataFrame
 # @export
 # @examples
@@ -1167,7 +1167,7 @@ setMethod("where",
 #'
 #' @param x A Spark DataFrame
 #' @param y A Spark DataFrame
-#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a 
+#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
 #' Column expression. If joinExpr is omitted, join() wil perform a Cartesian join
 #' @param joinType The type of join to perform. The following join types are available:
 #' 'inner', 'outer', 'left_outer', 'right_outer', 'semijoin'. The default joinType is "inner".
@@ -1303,7 +1303,7 @@ setMethod("except",
 #' @param source A name for external data source
 #' @param mode One of 'append', 'overwrite', 'error', 'ignore'
 #'
-#' @rdname write.df 
+#' @rdname write.df
 #' @export
 #' @examples
 #'\dontrun{
@@ -1401,7 +1401,7 @@ setMethod("saveAsTable",
 #' @param col A string of name
 #' @param ... Additional expressions
 #' @return A DataFrame
-#' @rdname describe 
+#' @rdname describe
 #' @export
 #' @examples
 #'\dontrun{
@@ -1444,7 +1444,7 @@ setMethod("describe",
 #'                    This overwrites the how parameter.
 #' @param cols Optional list of column names to consider.
 #' @return A DataFrame
-#' 
+#'
 #' @rdname nafunctions
 #' @export
 #' @examples
@@ -1465,7 +1465,7 @@ setMethod("dropna",
             if (is.null(minNonNulls)) {
               minNonNulls <- if (how == "any") { length(cols) } else { 1 }
             }
-            
+
             naFunctions <- callJMethod(x@sdf, "na")
             sdf <- callJMethod(naFunctions, "drop",
                                as.integer(minNonNulls), listToSeq(as.list(cols)))
@@ -1488,16 +1488,16 @@ setMethod("na.omit",
 #' @param value Value to replace null values with.
 #'              Should be an integer, numeric, character or named list.
 #'              If the value is a named list, then cols is ignored and
-#'              value must be a mapping from column name (character) to 
+#'              value must be a mapping from column name (character) to
 #'              replacement value. The replacement value must be an
 #'              integer, numeric or character.
 #' @param cols optional list of column names to consider.
 #'             Columns specified in cols that do not have matching data
-#'             type are ignored. For example, if value is a character, and 
+#'             type are ignored. For example, if value is a character, and
 #'             subset contains a non-character column, then the non-character
 #'             column is simply ignored.
 #' @return A DataFrame
-#' 
+#'
 #' @rdname nafunctions
 #' @export
 #' @examples
@@ -1515,14 +1515,14 @@ setMethod("fillna",
             if (!(class(value) %in% c("integer", "numeric", "character", "list"))) {
               stop("value should be an integer, numeric, charactor or named list.")
             }
-            
+
             if (class(value) == "list") {
               # Check column names in the named list
               colNames <- names(value)
               if (length(colNames) == 0 || !all(colNames != "")) {
                 stop("value should be an a named list with each name being a column name.")
               }
-              
+
               # Convert to the named list to an environment to be passed to JVM
               valueMap <- new.env()
               for (col in colNames) {
@@ -1533,19 +1533,19 @@ setMethod("fillna",
                 }
                 valueMap[[col]] <- v
               }
-              
+
               # When value is a named list, caller is expected not to pass in cols
               if (!is.null(cols)) {
                 warning("When value is a named list, cols is ignored!")
                 cols <- NULL
               }
-              
+
               value <- valueMap
             } else if (is.integer(value)) {
               # Cast an integer to a numeric
               value <- as.numeric(value)
             }
-            
+
             naFunctions <- callJMethod(x@sdf, "na")
             sdf <- if (length(cols) == 0) {
               callJMethod(naFunctions, "fill", value)

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/RDD.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 0513299..8951114 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -48,7 +48,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode,
   # byte: The RDD stores data serialized in R.
   # string: The RDD stores data as strings.
   # row: The RDD stores the serialized rows of a DataFrame.
-  
+
   # We use an environment to store mutable states inside an RDD object.
   # Note that R's call-by-value semantics makes modifying slots inside an
   # object (passed as an argument into a function, such as cache()) difficult:
@@ -363,7 +363,7 @@ setMethod("collectPartition",
 
 # @description
 # \code{collectAsMap} returns a named list as a map that contains all of the elements
-# in a key-value pair RDD. 
+# in a key-value pair RDD.
 # @examples
 #\dontrun{
 # sc <- sparkR.init()
@@ -666,7 +666,7 @@ setMethod("minimum",
 # rdd <- parallelize(sc, 1:10)
 # sumRDD(rdd) # 55
 #}
-# @rdname sumRDD 
+# @rdname sumRDD
 # @aliases sumRDD,RDD
 setMethod("sumRDD",
           signature(x = "RDD"),
@@ -1090,11 +1090,11 @@ setMethod("sortBy",
 # Return:
 #   A list of the first N elements from the RDD in the specified order.
 #
-takeOrderedElem <- function(x, num, ascending = TRUE) {          
+takeOrderedElem <- function(x, num, ascending = TRUE) {
   if (num <= 0L) {
     return(list())
   }
-  
+
   partitionFunc <- function(part) {
     if (num < length(part)) {
       # R limitation: order works only on primitive types!
@@ -1152,7 +1152,7 @@ takeOrderedElem <- function(x, num, ascending = TRUE) {
 # @aliases takeOrdered,RDD,RDD-method
 setMethod("takeOrdered",
           signature(x = "RDD", num = "integer"),
-          function(x, num) {          
+          function(x, num) {
             takeOrderedElem(x, num)
           })
 
@@ -1173,7 +1173,7 @@ setMethod("takeOrdered",
 # @aliases top,RDD,RDD-method
 setMethod("top",
           signature(x = "RDD", num = "integer"),
-          function(x, num) {          
+          function(x, num) {
             takeOrderedElem(x, num, FALSE)
           })
 
@@ -1181,7 +1181,7 @@ setMethod("top",
 #
 # Aggregate the elements of each partition, and then the results for all the
 # partitions, using a given associative function and a neutral "zero value".
-# 
+#
 # @param x An RDD.
 # @param zeroValue A neutral "zero value".
 # @param op An associative function for the folding operation.
@@ -1207,7 +1207,7 @@ setMethod("fold",
 #
 # Aggregate the elements of each partition, and then the results for all the
 # partitions, using given combine functions and a neutral "zero value".
-# 
+#
 # @param x An RDD.
 # @param zeroValue A neutral "zero value".
 # @param seqOp A function to aggregate the RDD elements. It may return a different
@@ -1230,11 +1230,11 @@ setMethod("fold",
 # @aliases aggregateRDD,RDD,RDD-method
 setMethod("aggregateRDD",
           signature(x = "RDD", zeroValue = "ANY", seqOp = "ANY", combOp = "ANY"),
-          function(x, zeroValue, seqOp, combOp) {        
+          function(x, zeroValue, seqOp, combOp) {
             partitionFunc <- function(part) {
               Reduce(seqOp, part, zeroValue)
             }
-            
+
             partitionList <- collect(lapplyPartition(x, partitionFunc),
                                      flatten = FALSE)
             Reduce(combOp, partitionList, zeroValue)
@@ -1330,7 +1330,7 @@ setMethod("setName",
 #\dontrun{
 # sc <- sparkR.init()
 # rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
-# collect(zipWithUniqueId(rdd)) 
+# collect(zipWithUniqueId(rdd))
 # # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2))
 #}
 # @rdname zipWithUniqueId
@@ -1426,7 +1426,7 @@ setMethod("glom",
             partitionFunc <- function(part) {
               list(part)
             }
-            
+
             lapplyPartition(x, partitionFunc)
           })
 
@@ -1498,16 +1498,16 @@ setMethod("zipRDD",
             # The jrdd's elements are of scala Tuple2 type. The serialized
             # flag here is used for the elements inside the tuples.
             rdd <- RDD(jrdd, getSerializedMode(rdds[[1]]))
-            
+
             mergePartitions(rdd, TRUE)
           })
 
 # Cartesian product of this RDD and another one.
 #
-# Return the Cartesian product of this RDD and another one, 
-# that is, the RDD of all pairs of elements (a, b) where a 
+# Return the Cartesian product of this RDD and another one,
+# that is, the RDD of all pairs of elements (a, b) where a
 # is in this and b is in other.
-# 
+#
 # @param x An RDD.
 # @param other An RDD.
 # @return A new RDD which is the Cartesian product of these two RDDs.
@@ -1515,7 +1515,7 @@ setMethod("zipRDD",
 #\dontrun{
 # sc <- sparkR.init()
 # rdd <- parallelize(sc, 1:2)
-# sortByKey(cartesian(rdd, rdd)) 
+# sortByKey(cartesian(rdd, rdd))
 # # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2))
 #}
 # @rdname cartesian
@@ -1528,7 +1528,7 @@ setMethod("cartesian",
             # The jrdd's elements are of scala Tuple2 type. The serialized
             # flag here is used for the elements inside the tuples.
             rdd <- RDD(jrdd, getSerializedMode(rdds[[1]]))
-            
+
             mergePartitions(rdd, FALSE)
           })
 
@@ -1598,11 +1598,11 @@ setMethod("intersection",
 
 # Zips an RDD's partitions with one (or more) RDD(s).
 # Same as zipPartitions in Spark.
-# 
+#
 # @param ... RDDs to be zipped.
 # @param func A function to transform zipped partitions.
-# @return A new RDD by applying a function to the zipped partitions. 
-#         Assumes that all the RDDs have the *same number of partitions*, but 
+# @return A new RDD by applying a function to the zipped partitions.
+#         Assumes that all the RDDs have the *same number of partitions*, but
 #         does *not* require them to have the same number of elements in each partition.
 # @examples
 #\dontrun{
@@ -1610,7 +1610,7 @@ setMethod("intersection",
 # rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
 # rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
 # rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
-# collect(zipPartitions(rdd1, rdd2, rdd3, 
+# collect(zipPartitions(rdd1, rdd2, rdd3,
 #                       func = function(x, y, z) { list(list(x, y, z))} ))
 # # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))
 #}
@@ -1627,7 +1627,7 @@ setMethod("zipPartitions",
             if (length(unique(nPart)) != 1) {
               stop("Can only zipPartitions RDDs which have the same number of partitions.")
             }
-            
+
             rrdds <- lapply(rrdds, function(rdd) {
               mapPartitionsWithIndex(rdd, function(partIndex, part) {
                 print(length(part))

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/SQLContext.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 22a4b5b..9a743a3 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -182,7 +182,7 @@ setMethod("toDF", signature(x = "RDD"),
 
 #' Create a DataFrame from a JSON file.
 #'
-#' Loads a JSON file (one object per line), returning the result as a DataFrame 
+#' Loads a JSON file (one object per line), returning the result as a DataFrame
 #' It goes through the entire dataset once to determine the schema.
 #'
 #' @param sqlContext SQLContext to use
@@ -238,7 +238,7 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
 
 
 #' Create a DataFrame from a Parquet file.
-#' 
+#'
 #' Loads a Parquet file, returning the result as a DataFrame.
 #'
 #' @param sqlContext SQLContext to use
@@ -278,7 +278,7 @@ sql <- function(sqlContext, sqlQuery) {
 }
 
 #' Create a DataFrame from a SparkSQL Table
-#' 
+#'
 #' Returns the specified Table as a DataFrame.  The Table must have already been registered
 #' in the SQLContext.
 #'
@@ -298,7 +298,7 @@ sql <- function(sqlContext, sqlQuery) {
 
 table <- function(sqlContext, tableName) {
   sdf <- callJMethod(sqlContext, "table", tableName)
-  dataFrame(sdf) 
+  dataFrame(sdf)
 }
 
 
@@ -352,7 +352,7 @@ tableNames <- function(sqlContext, databaseName = NULL) {
 
 
 #' Cache Table
-#' 
+#'
 #' Caches the specified table in-memory.
 #'
 #' @param sqlContext SQLContext to use
@@ -370,11 +370,11 @@ tableNames <- function(sqlContext, databaseName = NULL) {
 #' }
 
 cacheTable <- function(sqlContext, tableName) {
-  callJMethod(sqlContext, "cacheTable", tableName)  
+  callJMethod(sqlContext, "cacheTable", tableName)
 }
 
 #' Uncache Table
-#' 
+#'
 #' Removes the specified table from the in-memory cache.
 #'
 #' @param sqlContext SQLContext to use

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/broadcast.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/broadcast.R b/R/pkg/R/broadcast.R
index 23dc387..2403925 100644
--- a/R/pkg/R/broadcast.R
+++ b/R/pkg/R/broadcast.R
@@ -27,9 +27,9 @@
 # @description Broadcast variables can be created using the broadcast
 #              function from a \code{SparkContext}.
 # @rdname broadcast-class
-# @seealso broadcast 
+# @seealso broadcast
 #
-# @param id Id of the backing Spark broadcast variable 
+# @param id Id of the backing Spark broadcast variable
 # @export
 setClass("Broadcast", slots = list(id = "character"))
 
@@ -68,7 +68,7 @@ setMethod("value",
 # variable on workers. Not intended for use outside the package.
 #
 # @rdname broadcast-internal
-# @seealso broadcast, value 
+# @seealso broadcast, value
 
 # @param bcastId The id of broadcast variable to set
 # @param value The value to be set

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/deserialize.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 257b435..d961bbc 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -18,7 +18,7 @@
 # Utility functions to deserialize objects from Java.
 
 # Type mapping from Java to R
-# 
+#
 # void -> NULL
 # Int -> integer
 # String -> character

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 12e0917..79055b7 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -130,7 +130,7 @@ setGeneric("maximum", function(x) { standardGeneric("maximum") })
 # @export
 setGeneric("minimum", function(x) { standardGeneric("minimum") })
 
-# @rdname sumRDD 
+# @rdname sumRDD
 # @export
 setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })
 
@@ -219,7 +219,7 @@ setGeneric("zipRDD", function(x, other) { standardGeneric("zipRDD") })
 
 # @rdname zipRDD
 # @export
-setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") }, 
+setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") },
            signature = "...")
 
 # @rdname zipWithIndex
@@ -364,7 +364,7 @@ setGeneric("subtract",
 
 # @rdname subtractByKey
 # @export
-setGeneric("subtractByKey", 
+setGeneric("subtractByKey",
            function(x, other, numPartitions = 1) {
              standardGeneric("subtractByKey")
            })
@@ -399,15 +399,15 @@ setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })
 #' @rdname nafunctions
 #' @export
 setGeneric("dropna",
-           function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) { 
-             standardGeneric("dropna") 
+           function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
+             standardGeneric("dropna")
            })
 
 #' @rdname nafunctions
 #' @export
 setGeneric("na.omit",
-           function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) { 
-             standardGeneric("na.omit") 
+           function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
+             standardGeneric("na.omit")
            })
 
 #' @rdname schema
@@ -656,4 +656,3 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
 #' @rdname column
 #' @export
 setGeneric("upper", function(x) { standardGeneric("upper") })
-

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/group.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index b758481..8f1c68f 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -136,4 +136,3 @@ createMethods <- function() {
 }
 
 createMethods()
-

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/jobj.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/jobj.R b/R/pkg/R/jobj.R
index a8a2523..0838a7b 100644
--- a/R/pkg/R/jobj.R
+++ b/R/pkg/R/jobj.R
@@ -16,7 +16,7 @@
 #
 
 # References to objects that exist on the JVM backend
-# are maintained using the jobj. 
+# are maintained using the jobj.
 
 #' @include generics.R
 NULL

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/pairRDD.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index 1e24286..7f902ba 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -784,7 +784,7 @@ setMethod("sortByKey",
             newRDD <- partitionBy(x, numPartitions, rangePartitionFunc)
             lapplyPartition(newRDD, partitionFunc)
           })
-          
+
 # Subtract a pair RDD with another pair RDD.
 #
 # Return an RDD with the pairs from x whose keys are not in other.
@@ -820,7 +820,7 @@ setMethod("subtractByKey",
           })
 
 # Return a subset of this RDD sampled by key.
-# 
+#
 # @description
 # \code{sampleByKey} Create a sample of this RDD using variable sampling rates
 # for different keys as specified by fractions, a key to sampling rate map.

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/schema.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index e442119..15e2bdb 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -20,7 +20,7 @@
 
 #' structType
 #'
-#' Create a structType object that contains the metadata for a DataFrame. Intended for 
+#' Create a structType object that contains the metadata for a DataFrame. Intended for
 #' use with createDataFrame and toDF.
 #'
 #' @param x a structField object (created with the field() function)

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/serialize.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 3169d79..78535ef 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -175,7 +175,7 @@ writeGenericList <- function(con, list) {
     writeObject(con, elem)
   }
 }
-  
+
 # Used to pass in hash maps required on Java side.
 writeEnv <- function(con, env) {
   len <- length(env)

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/sparkR.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 2efd4f0..dbde0c4 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -43,7 +43,7 @@ sparkR.stop <- function() {
       callJMethod(sc, "stop")
       rm(".sparkRjsc", envir = env)
     }
-  
+
     if (exists(".backendLaunched", envir = env)) {
       callJStatic("SparkRHandler", "stopBackend")
     }
@@ -174,7 +174,7 @@ sparkR.init <- function(
   for (varname in names(sparkEnvir)) {
     sparkEnvirMap[[varname]] <- sparkEnvir[[varname]]
   }
-  
+
   sparkExecutorEnvMap <- new.env()
   if (!any(names(sparkExecutorEnv) == "LD_LIBRARY_PATH")) {
     sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <- paste0("$LD_LIBRARY_PATH:",Sys.getenv("LD_LIBRARY_PATH"))
@@ -214,7 +214,7 @@ sparkR.init <- function(
 
 #' Initialize a new SQLContext.
 #'
-#' This function creates a SparkContext from an existing JavaSparkContext and 
+#' This function creates a SparkContext from an existing JavaSparkContext and
 #' then uses it to initialize a new SQLContext
 #'
 #' @param jsc The existing JavaSparkContext created with SparkR.init()

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/utils.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 69b2700..13cec0f 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -368,21 +368,21 @@ listToSeq <- function(l) {
 }
 
 # Utility function to recursively traverse the Abstract Syntax Tree (AST) of a
-# user defined function (UDF), and to examine variables in the UDF to decide 
+# user defined function (UDF), and to examine variables in the UDF to decide
 # if their values should be included in the new function environment.
 # param
 #   node The current AST node in the traversal.
 #   oldEnv The original function environment.
 #   defVars An Accumulator of variables names defined in the function's calling environment,
 #           including function argument and local variable names.
-#   checkedFunc An environment of function objects examined during cleanClosure. It can 
+#   checkedFunc An environment of function objects examined during cleanClosure. It can
 #               be considered as a "name"-to-"list of functions" mapping.
 #   newEnv A new function environment to store necessary function dependencies, an output argument.
 processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
   nodeLen <- length(node)
-  
+
   if (nodeLen > 1 && typeof(node) == "language") {
-    # Recursive case: current AST node is an internal node, check for its children. 
+    # Recursive case: current AST node is an internal node, check for its children.
     if (length(node[[1]]) > 1) {
       for (i in 1:nodeLen) {
         processClosure(node[[i]], oldEnv, defVars, checkedFuncs, newEnv)
@@ -393,7 +393,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
         for (i in 2:nodeLen) {
           processClosure(node[[i]], oldEnv, defVars, checkedFuncs, newEnv)
         }
-      } else if (nodeChar == "<-" || nodeChar == "=" || 
+      } else if (nodeChar == "<-" || nodeChar == "=" ||
                    nodeChar == "<<-") { # Assignment Ops.
         defVar <- node[[2]]
         if (length(defVar) == 1 && typeof(defVar) == "symbol") {
@@ -422,21 +422,21 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
         }
       }
     }
-  } else if (nodeLen == 1 && 
+  } else if (nodeLen == 1 &&
                (typeof(node) == "symbol" || typeof(node) == "language")) {
     # Base case: current AST node is a leaf node and a symbol or a function call.
     nodeChar <- as.character(node)
     if (!nodeChar %in% defVars$data) {  # Not a function parameter or local variable.
       func.env <- oldEnv
       topEnv <- parent.env(.GlobalEnv)
-      # Search in function environment, and function's enclosing environments 
+      # Search in function environment, and function's enclosing environments
       # up to global environment. There is no need to look into package environments
-      # above the global or namespace environment that is not SparkR below the global, 
+      # above the global or namespace environment that is not SparkR below the global,
       # as they are assumed to be loaded on workers.
       while (!identical(func.env, topEnv)) {
         # Namespaces other than "SparkR" will not be searched.
-        if (!isNamespace(func.env) || 
-              (getNamespaceName(func.env) == "SparkR" && 
+        if (!isNamespace(func.env) ||
+              (getNamespaceName(func.env) == "SparkR" &&
               !(nodeChar %in% getNamespaceExports("SparkR")))) {  # Only include SparkR internals.
           # Set parameter 'inherits' to FALSE since we do not need to search in
           # attached package environments.
@@ -444,7 +444,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
                        error = function(e) { FALSE })) {
             obj <- get(nodeChar, envir = func.env, inherits = FALSE)
             if (is.function(obj)) {  # If the node is a function call.
-              funcList <- mget(nodeChar, envir = checkedFuncs, inherits = F, 
+              funcList <- mget(nodeChar, envir = checkedFuncs, inherits = F,
                                ifnotfound = list(list(NULL)))[[1]]
               found <- sapply(funcList, function(func) {
                 ifelse(identical(func, obj), TRUE, FALSE)
@@ -453,7 +453,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
                 break
               }
               # Function has not been examined, record it and recursively clean its closure.
-              assign(nodeChar, 
+              assign(nodeChar,
                      if (is.null(funcList[[1]])) {
                        list(obj)
                      } else {
@@ -466,7 +466,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
             break
           }
         }
-        
+
         # Continue to search in enclosure.
         func.env <- parent.env(func.env)
       }
@@ -474,8 +474,8 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
   }
 }
 
-# Utility function to get user defined function (UDF) dependencies (closure). 
-# More specifically, this function captures the values of free variables defined 
+# Utility function to get user defined function (UDF) dependencies (closure).
+# More specifically, this function captures the values of free variables defined
 # outside a UDF, and stores them in the function's environment.
 # param
 #   func A function whose closure needs to be captured.
@@ -488,7 +488,7 @@ cleanClosure <- function(func, checkedFuncs = new.env()) {
     newEnv <- new.env(parent = .GlobalEnv)
     func.body <- body(func)
     oldEnv <- environment(func)
-    # defVars is an Accumulator of variables names defined in the function's calling 
+    # defVars is an Accumulator of variables names defined in the function's calling
     # environment. First, function's arguments are added to defVars.
     defVars <- initAccumulator()
     argNames <- names(as.list(args(func)))
@@ -509,15 +509,15 @@ cleanClosure <- function(func, checkedFuncs = new.env()) {
 # return value
 #   A list of two result RDDs.
 appendPartitionLengths <- function(x, other) {
-  if (getSerializedMode(x) != getSerializedMode(other) || 
+  if (getSerializedMode(x) != getSerializedMode(other) ||
       getSerializedMode(x) == "byte") {
     # Append the number of elements in each partition to that partition so that we can later
     # know the boundary of elements from x and other.
     #
-    # Note that this appending also serves the purpose of reserialization, because even if 
+    # Note that this appending also serves the purpose of reserialization, because even if
     # any RDD is serialized, we need to reserialize it to make sure its partitions are encoded
     # as a single byte array. For example, partitions of an RDD generated from partitionBy()
-    # may be encoded as multiple byte arrays.          
+    # may be encoded as multiple byte arrays.
     appendLength <- function(part) {
       len <- length(part)
       part[[len + 1]] <- len + 1
@@ -544,23 +544,23 @@ mergePartitions <- function(rdd, zip) {
         lengthOfValues <- part[[len]]
         lengthOfKeys <- part[[len - lengthOfValues]]
         stopifnot(len == lengthOfKeys + lengthOfValues)
-        
+
         # For zip operation, check if corresponding partitions of both RDDs have the same number of elements.
         if (zip && lengthOfKeys != lengthOfValues) {
           stop("Can only zip RDDs with same number of elements in each pair of corresponding partitions.")
         }
-        
+
         if (lengthOfKeys > 1) {
           keys <- part[1 : (lengthOfKeys - 1)]
         } else {
           keys <- list()
         }
         if (lengthOfValues > 1) {
-          values <- part[(lengthOfKeys + 1) : (len - 1)]                    
+          values <- part[(lengthOfKeys + 1) : (len - 1)]
         } else {
           values <- list()
         }
-        
+
         if (!zip) {
           return(mergeCompactLists(keys, values))
         }
@@ -578,6 +578,6 @@ mergePartitions <- function(rdd, zip) {
       part
     }
   }
-  
+
   PipelinedRDD(rdd, partitionFunc)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/zzz.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/zzz.R b/R/pkg/R/zzz.R
index 80d796d..301fead 100644
--- a/R/pkg/R/zzz.R
+++ b/R/pkg/R/zzz.R
@@ -18,4 +18,3 @@
 .onLoad <- function(libname, pkgname) {
   sparkR.onLoad(libname, pkgname)
 }
-

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/inst/tests/test_binaryFile.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_binaryFile.R b/R/pkg/inst/tests/test_binaryFile.R
index ca4218f..4db7266 100644
--- a/R/pkg/inst/tests/test_binaryFile.R
+++ b/R/pkg/inst/tests/test_binaryFile.R
@@ -59,15 +59,15 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works",
   wordCount <- lapply(words, function(word) { list(word, 1L) })
 
   counts <- reduceByKey(wordCount, "+", 2L)
-  
+
   saveAsObjectFile(counts, fileName2)
   counts <- objectFile(sc, fileName2)
-    
+
   output <- collect(counts)
   expected <- list(list("awesome.", 1), list("Spark", 2), list("pretty.", 1),
                     list("is", 2))
   expect_equal(sortKeyValueList(output), sortKeyValueList(expected))
-  
+
   unlink(fileName1)
   unlink(fileName2, recursive = TRUE)
 })
@@ -87,4 +87,3 @@ test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
   unlink(fileName1, recursive = TRUE)
   unlink(fileName2, recursive = TRUE)
 })
-

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/inst/tests/test_binary_function.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_binary_function.R b/R/pkg/inst/tests/test_binary_function.R
index 6785a7b..a1e354e 100644
--- a/R/pkg/inst/tests/test_binary_function.R
+++ b/R/pkg/inst/tests/test_binary_function.R
@@ -30,7 +30,7 @@ mockFile <- c("Spark is pretty.", "Spark is awesome.")
 test_that("union on two RDDs", {
   actual <- collect(unionRDD(rdd, rdd))
   expect_equal(actual, as.list(rep(nums, 2)))
-  
+
   fileName <- tempfile(pattern="spark-test", fileext=".tmp")
   writeLines(mockFile, fileName)
 
@@ -52,14 +52,14 @@ test_that("union on two RDDs", {
 test_that("cogroup on two RDDs", {
   rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-  cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L) 
+  cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
   actual <- collect(cogroup.rdd)
-  expect_equal(actual, 
+  expect_equal(actual,
                list(list(1, list(list(1), list(2, 3))), list(2, list(list(4), list()))))
-  
+
   rdd1 <- parallelize(sc, list(list("a", 1), list("a", 4)))
   rdd2 <- parallelize(sc, list(list("b", 2), list("a", 3)))
-  cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L) 
+  cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
   actual <- collect(cogroup.rdd)
 
   expected <- list(list("b", list(list(), list(2))), list("a", list(list(1, 4), list(3))))
@@ -71,31 +71,31 @@ test_that("zipPartitions() on RDDs", {
   rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
   rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
   rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
-  actual <- collect(zipPartitions(rdd1, rdd2, rdd3, 
+  actual <- collect(zipPartitions(rdd1, rdd2, rdd3,
                                   func = function(x, y, z) { list(list(x, y, z))} ))
   expect_equal(actual,
                list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6))))
-  
+
   mockFile = c("Spark is pretty.", "Spark is awesome.")
   fileName <- tempfile(pattern="spark-test", fileext=".tmp")
   writeLines(mockFile, fileName)
-  
+
   rdd <- textFile(sc, fileName, 1)
-  actual <- collect(zipPartitions(rdd, rdd, 
+  actual <- collect(zipPartitions(rdd, rdd,
                                   func = function(x, y) { list(paste(x, y, sep = "\n")) }))
   expected <- list(paste(mockFile, mockFile, sep = "\n"))
   expect_equal(actual, expected)
-  
+
   rdd1 <- parallelize(sc, 0:1, 1)
-  actual <- collect(zipPartitions(rdd1, rdd, 
+  actual <- collect(zipPartitions(rdd1, rdd,
                                   func = function(x, y) { list(x + nchar(y)) }))
   expected <- list(0:1 + nchar(mockFile))
   expect_equal(actual, expected)
-  
+
   rdd <- map(rdd, function(x) { x })
-  actual <- collect(zipPartitions(rdd, rdd1, 
+  actual <- collect(zipPartitions(rdd, rdd1,
                                   func = function(x, y) { list(y + nchar(x)) }))
   expect_equal(actual, expected)
-  
+
   unlink(fileName)
 })

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/inst/tests/test_rdd.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_rdd.R b/R/pkg/inst/tests/test_rdd.R
index 0320735..4fe6538 100644
--- a/R/pkg/inst/tests/test_rdd.R
+++ b/R/pkg/inst/tests/test_rdd.R
@@ -477,7 +477,7 @@ test_that("cartesian() on RDDs", {
                  list(1, 1), list(1, 2), list(1, 3),
                  list(2, 1), list(2, 2), list(2, 3),
                  list(3, 1), list(3, 2), list(3, 3)))
-  
+
   # test case where one RDD is empty
   emptyRdd <- parallelize(sc, list())
   actual <- collect(cartesian(rdd, emptyRdd))
@@ -486,7 +486,7 @@ test_that("cartesian() on RDDs", {
   mockFile = c("Spark is pretty.", "Spark is awesome.")
   fileName <- tempfile(pattern="spark-test", fileext=".tmp")
   writeLines(mockFile, fileName)
-  
+
   rdd <- textFile(sc, fileName)
   actual <- collect(cartesian(rdd, rdd))
   expected <- list(
@@ -495,7 +495,7 @@ test_that("cartesian() on RDDs", {
     list("Spark is pretty.", "Spark is pretty."),
     list("Spark is pretty.", "Spark is awesome."))
   expect_equal(sortKeyValueList(actual), expected)
-  
+
   rdd1 <- parallelize(sc, 0:1)
   actual <- collect(cartesian(rdd1, rdd))
   expect_equal(sortKeyValueList(actual),
@@ -504,11 +504,11 @@ test_that("cartesian() on RDDs", {
                  list(0, "Spark is awesome."),
                  list(1, "Spark is pretty."),
                  list(1, "Spark is awesome.")))
-  
+
   rdd1 <- map(rdd, function(x) { x })
   actual <- collect(cartesian(rdd, rdd1))
   expect_equal(sortKeyValueList(actual), expected)
-  
+
   unlink(fileName)
 })
 
@@ -760,7 +760,7 @@ test_that("collectAsMap() on a pairwise RDD", {
 })
 
 test_that("show()", {
-  rdd <- parallelize(sc, list(1:10))    
+  rdd <- parallelize(sc, list(1:10))
   expect_output(show(rdd), "ParallelCollectionRDD\\[\\d+\\] at parallelize at RRDD\\.scala:\\d+")
 })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/inst/tests/test_shuffle.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_shuffle.R b/R/pkg/inst/tests/test_shuffle.R
index d7dedda..adf0b91 100644
--- a/R/pkg/inst/tests/test_shuffle.R
+++ b/R/pkg/inst/tests/test_shuffle.R
@@ -106,39 +106,39 @@ test_that("aggregateByKey", {
   zeroValue <- list(0, 0)
   seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
   combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
-  aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)   
-  
+  aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
+
   actual <- collect(aggregatedRDD)
-  
+
   expected <- list(list(1, list(3, 2)), list(2, list(7, 2)))
   expect_equal(sortKeyValueList(actual), sortKeyValueList(expected))
 
   # test aggregateByKey for string keys
   rdd <- parallelize(sc, list(list("a", 1), list("a", 2), list("b", 3), list("b", 4)))
-  
+
   zeroValue <- list(0, 0)
   seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
   combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
-  aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)   
+  aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
 
   actual <- collect(aggregatedRDD)
-  
+
   expected <- list(list("a", list(3, 2)), list("b", list(7, 2)))
   expect_equal(sortKeyValueList(actual), sortKeyValueList(expected))
 })
 
-test_that("foldByKey", {  
+test_that("foldByKey", {
   # test foldByKey for int keys
   folded <- foldByKey(intRdd, 0, "+", 2L)
-  
+
   actual <- collect(folded)
-  
+
   expected <- list(list(2L, 101), list(1L, 199))
   expect_equal(sortKeyValueList(actual), sortKeyValueList(expected))
 
   # test foldByKey for double keys
   folded <- foldByKey(doubleRdd, 0, "+", 2L)
-  
+
   actual <- collect(folded)
 
   expected <- list(list(1.5, 199), list(2.5, 101))
@@ -146,15 +146,15 @@ test_that("foldByKey", {
 
   # test foldByKey for string keys
   stringKeyPairs <- list(list("a", -1), list("b", 100), list("b", 1), list("a", 200))
-  
+
   stringKeyRDD <- parallelize(sc, stringKeyPairs)
   folded <- foldByKey(stringKeyRDD, 0, "+", 2L)
-  
+
   actual <- collect(folded)
-  
+
   expected <- list(list("b", 101), list("a", 199))
   expect_equal(sortKeyValueList(actual), sortKeyValueList(expected))
-  
+
   # test foldByKey for empty pair RDD
   rdd <- parallelize(sc, list())
   folded <- foldByKey(rdd, 0, "+", 2L)

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/inst/tests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 8946348..fc7f3f0 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -67,7 +67,7 @@ test_that("structType and structField", {
   expect_true(inherits(testField, "structField"))
   expect_true(testField$name() == "a")
   expect_true(testField$nullable())
-  
+
   testSchema <- structType(testField, structField("b", "integer"))
   expect_true(inherits(testSchema, "structType"))
   expect_true(inherits(testSchema$fields()[[2]], "structField"))
@@ -598,7 +598,7 @@ test_that("column functions", {
   c3 <- lower(c) + upper(c) + first(c) + last(c)
   c4 <- approxCountDistinct(c) + countDistinct(c) + cast(c, "string")
   c5 <- n(c) + n_distinct(c)
-  c5 <- acos(c) + asin(c) + atan(c) + cbrt(c) 
+  c5 <- acos(c) + asin(c) + atan(c) + cbrt(c)
   c6 <- ceiling(c) + cos(c) + cosh(c) + exp(c) + expm1(c)
   c7 <- floor(c) + log(c) + log10(c) + log1p(c) + rint(c)
   c8 <- sign(c) + sin(c) + sinh(c) + tan(c) + tanh(c)
@@ -829,7 +829,7 @@ test_that("dropna() on a DataFrame", {
   rows <- collect(df)
 
   # drop with columns
-  
+
   expected <- rows[!is.na(rows$name),]
   actual <- collect(dropna(df, cols = "name"))
   expect_true(identical(expected, actual))
@@ -842,7 +842,7 @@ test_that("dropna() on a DataFrame", {
   expect_true(identical(expected$age, actual$age))
   expect_true(identical(expected$height, actual$height))
   expect_true(identical(expected$name, actual$name))
-  
+
   expected <- rows[!is.na(rows$age) & !is.na(rows$height),]
   actual <- collect(dropna(df, cols = c("age", "height")))
   expect_true(identical(expected, actual))
@@ -850,7 +850,7 @@ test_that("dropna() on a DataFrame", {
   expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
   actual <- collect(dropna(df))
   expect_true(identical(expected, actual))
-  
+
   # drop with how
 
   expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
@@ -860,7 +860,7 @@ test_that("dropna() on a DataFrame", {
   expected <- rows[!is.na(rows$age) | !is.na(rows$height) | !is.na(rows$name),]
   actual <- collect(dropna(df, "all"))
   expect_true(identical(expected, actual))
-  
+
   expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
   actual <- collect(dropna(df, "any"))
   expect_true(identical(expected, actual))
@@ -872,14 +872,14 @@ test_that("dropna() on a DataFrame", {
   expected <- rows[!is.na(rows$age) | !is.na(rows$height),]
   actual <- collect(dropna(df, "all", cols = c("age", "height")))
   expect_true(identical(expected, actual))
-  
+
   # drop with threshold
-  
+
   expected <- rows[as.integer(!is.na(rows$age)) + as.integer(!is.na(rows$height)) >= 2,]
   actual <- collect(dropna(df, minNonNulls = 2, cols = c("age", "height")))
-  expect_true(identical(expected, actual))  
+  expect_true(identical(expected, actual))
 
-  expected <- rows[as.integer(!is.na(rows$age)) + 
+  expected <- rows[as.integer(!is.na(rows$age)) +
                    as.integer(!is.na(rows$height)) +
                    as.integer(!is.na(rows$name)) >= 3,]
   actual <- collect(dropna(df, minNonNulls = 3, cols = c("name", "age", "height")))
@@ -889,9 +889,9 @@ test_that("dropna() on a DataFrame", {
 test_that("fillna() on a DataFrame", {
   df <- jsonFile(sqlContext, jsonPathNa)
   rows <- collect(df)
-  
+
   # fill with value
-  
+
   expected <- rows
   expected$age[is.na(expected$age)] <- 50
   expected$height[is.na(expected$height)] <- 50.6
@@ -912,7 +912,7 @@ test_that("fillna() on a DataFrame", {
   expected$name[is.na(expected$name)] <- "unknown"
   actual <- collect(fillna(df, "unknown", c("age", "name")))
   expect_true(identical(expected, actual))
-  
+
   # fill with named list
 
   expected <- rows
@@ -920,7 +920,7 @@ test_that("fillna() on a DataFrame", {
   expected$height[is.na(expected$height)] <- 50.6
   expected$name[is.na(expected$name)] <- "unknown"
   actual <- collect(fillna(df, list("age" = 50, "height" = 50.6, "name" = "unknown")))
-  expect_true(identical(expected, actual))  
+  expect_true(identical(expected, actual))
 })
 
 unlink(parquetPath)

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/inst/tests/test_take.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_take.R b/R/pkg/inst/tests/test_take.R
index 7f4c7c3..c5eb417 100644
--- a/R/pkg/inst/tests/test_take.R
+++ b/R/pkg/inst/tests/test_take.R
@@ -64,4 +64,3 @@ test_that("take() gives back the original elements in correct count and order",
   expect_true(length(take(numListRDD, 0)) == 0)
   expect_true(length(take(numVectorRDD, 0)) == 0)
 })
-

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/inst/tests/test_textFile.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_textFile.R b/R/pkg/inst/tests/test_textFile.R
index 6b87b4b..092ad9d 100644
--- a/R/pkg/inst/tests/test_textFile.R
+++ b/R/pkg/inst/tests/test_textFile.R
@@ -58,7 +58,7 @@ test_that("textFile() word count works as expected", {
   expected <- list(list("pretty.", 1), list("is", 2), list("awesome.", 1),
                    list("Spark", 2))
   expect_equal(sortKeyValueList(output), sortKeyValueList(expected))
-  
+
   unlink(fileName)
 })
 
@@ -115,13 +115,13 @@ test_that("textFile() and saveAsTextFile() word count works as expected", {
 
   saveAsTextFile(counts, fileName2)
   rdd <- textFile(sc, fileName2)
-   
+
   output <- collect(rdd)
   expected <- list(list("awesome.", 1), list("Spark", 2),
                    list("pretty.", 1), list("is", 2))
   expectedStr <- lapply(expected, function(x) { toString(x) })
   expect_equal(sortKeyValueList(output), sortKeyValueList(expectedStr))
-  
+
   unlink(fileName1)
   unlink(fileName2)
 })
@@ -159,4 +159,3 @@ test_that("Pipelined operations on RDDs created using textFile", {
 
   unlink(fileName)
 })
-

http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/inst/tests/test_utils.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/test_utils.R b/R/pkg/inst/tests/test_utils.R
index 539e3a3..15030e6 100644
--- a/R/pkg/inst/tests/test_utils.R
+++ b/R/pkg/inst/tests/test_utils.R
@@ -43,13 +43,13 @@ test_that("serializeToBytes on RDD", {
   mockFile <- c("Spark is pretty.", "Spark is awesome.")
   fileName <- tempfile(pattern="spark-test", fileext=".tmp")
   writeLines(mockFile, fileName)
-  
+
   text.rdd <- textFile(sc, fileName)
   expect_true(getSerializedMode(text.rdd) == "string")
   ser.rdd <- serializeToBytes(text.rdd)
   expect_equal(collect(ser.rdd), as.list(mockFile))
   expect_true(getSerializedMode(ser.rdd) == "byte")
-  
+
   unlink(fileName)
 })
 
@@ -64,7 +64,7 @@ test_that("cleanClosure on R functions", {
   expect_equal(actual, y)
   actual <- get("g", envir = env, inherits = FALSE)
   expect_equal(actual, g)
-  
+
   # Test for nested enclosures and package variables.
   env2 <- new.env()
   funcEnv <- new.env(parent = env2)
@@ -106,7 +106,7 @@ test_that("cleanClosure on R functions", {
   expect_equal(length(ls(env)), 1)
   actual <- get("y", envir = env, inherits = FALSE)
   expect_equal(actual, y)
-  
+
   # Test for function (and variable) definitions.
   f <- function(x) {
     g <- function(y) { y * 2 }
@@ -115,7 +115,7 @@ test_that("cleanClosure on R functions", {
   newF <- cleanClosure(f)
   env <- environment(newF)
   expect_equal(length(ls(env)), 0)  # "y" and "g" should not be included.
-  
+
   # Test for overriding variables in base namespace (Issue: SparkR-196).
   nums <- as.list(1:10)
   rdd <- parallelize(sc, nums, 2L)
@@ -128,7 +128,7 @@ test_that("cleanClosure on R functions", {
   actual <- collect(lapply(rdd, f))
   expected <- as.list(c(rep(FALSE, 4), rep(TRUE, 6)))
   expect_equal(actual, expected)
-  
+
   # Test for broadcast variables.
   a <- matrix(nrow=10, ncol=10, data=rnorm(100))
   aBroadcast <- broadcast(sc, a)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message