Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 61B03200B6A for ; Mon, 22 Aug 2016 21:27:44 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 603A2160AB3; Mon, 22 Aug 2016 19:27:44 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 078CB160A87 for ; Mon, 22 Aug 2016 21:27:42 +0200 (CEST) Received: (qmail 96416 invoked by uid 500); 22 Aug 2016 19:27:42 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 96407 invoked by uid 99); 22 Aug 2016 19:27:42 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 22 Aug 2016 19:27:42 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 00451E00C4; Mon, 22 Aug 2016 19:27:41 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: felixcheung@apache.org To: commits@spark.apache.org Message-Id: <2a736d801bc3403b848b0d3aeccc97eb@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-17173][SPARKR] R MLlib refactor, cleanup, reformat, fix deprecation in test Date: Mon, 22 Aug 2016 19:27:41 +0000 (UTC) archived-at: Mon, 22 Aug 2016 19:27:44 -0000 Repository: spark Updated Branches: refs/heads/master 342278c09 -> 0583ecda1 [SPARK-17173][SPARKR] R MLlib refactor, cleanup, reformat, fix deprecation in test ## What changes were proposed in this pull request? refactor, cleanup, reformat, fix deprecation in test ## How was this patch tested? unit tests, manual tests Author: Felix Cheung Closes #14735 from felixcheung/rmllibutil. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0583ecda Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0583ecda Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0583ecda Branch: refs/heads/master Commit: 0583ecda1b63a7e3f126c3276059e4f99548a741 Parents: 342278c Author: Felix Cheung Authored: Mon Aug 22 12:27:33 2016 -0700 Committer: Felix Cheung Committed: Mon Aug 22 12:27:33 2016 -0700 ---------------------------------------------------------------------- R/pkg/R/mllib.R | 205 ++++++++++++---------------- R/pkg/inst/tests/testthat/test_mllib.R | 10 +- 2 files changed, 98 insertions(+), 117 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/0583ecda/R/pkg/R/mllib.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 9a53c80..b36fbce 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -88,9 +88,9 @@ setClass("ALSModel", representation(jobj = "jobj")) #' @rdname write.ml #' @name write.ml #' @export -#' @seealso \link{spark.glm}, \link{glm}, \link{spark.gaussianMixture} -#' @seealso \link{spark.als}, \link{spark.kmeans}, \link{spark.lda}, \link{spark.naiveBayes} -#' @seealso \link{spark.survreg}, \link{spark.isoreg} +#' @seealso \link{spark.glm}, \link{glm}, +#' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.isoreg}, \link{spark.kmeans}, +#' @seealso \link{spark.lda}, \link{spark.naiveBayes}, \link{spark.survreg}, #' @seealso \link{read.ml} NULL @@ -101,11 +101,22 @@ NULL #' @rdname predict #' @name predict #' @export -#' @seealso \link{spark.glm}, \link{glm}, \link{spark.gaussianMixture} -#' @seealso \link{spark.als}, \link{spark.kmeans}, \link{spark.naiveBayes}, \link{spark.survreg} -#' @seealso \link{spark.isoreg} +#' @seealso \link{spark.glm}, \link{glm}, +#' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.isoreg}, \link{spark.kmeans}, +#' @seealso \link{spark.naiveBayes}, \link{spark.survreg}, NULL +write_internal <- function(object, path, overwrite = FALSE) { + writer <- callJMethod(object@jobj, "write") + if (overwrite) { + writer <- callJMethod(writer, "overwrite") + } + invisible(callJMethod(writer, "save", path)) +} + +predict_internal <- function(object, newData) { + dataFrame(callJMethod(object@jobj, "transform", newData@sdf)) +} #' Generalized Linear Models #' @@ -173,7 +184,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), jobj <- callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper", "fit", formula, data@sdf, family$family, family$link, tol, as.integer(maxIter), as.character(weightCol)) - return(new("GeneralizedLinearRegressionModel", jobj = jobj)) + new("GeneralizedLinearRegressionModel", jobj = jobj) }) #' Generalized Linear Models (R-compliant) @@ -219,7 +230,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat #' @export #' @note summary(GeneralizedLinearRegressionModel) since 2.0.0 setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), - function(object, ...) { + function(object) { jobj <- object@jobj is.loaded <- callJMethod(jobj, "isLoaded") features <- callJMethod(jobj, "rFeatures") @@ -245,7 +256,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), deviance = deviance, df.null = df.null, df.residual = df.residual, aic = aic, iter = iter, family = family, is.loaded = is.loaded) class(ans) <- "summary.GeneralizedLinearRegressionModel" - return(ans) + ans }) # Prints the summary of GeneralizedLinearRegressionModel @@ -275,8 +286,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { " on", format(unlist(x[c("df.null", "df.residual")])), " degrees of freedom\n"), 1L, paste, collapse = " "), sep = "") cat("AIC: ", format(x$aic, digits = 4L), "\n\n", - "Number of Fisher Scoring iterations: ", x$iter, "\n", sep = "") - cat("\n") + "Number of Fisher Scoring iterations: ", x$iter, "\n\n", sep = "") invisible(x) } @@ -291,7 +301,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { #' @note predict(GeneralizedLinearRegressionModel) since 1.5.0 setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) # Makes predictions from a naive Bayes model or a model produced by spark.naiveBayes(), @@ -305,7 +315,7 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), #' @note predict(NaiveBayesModel) since 2.0.0 setMethod("predict", signature(object = "NaiveBayesModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) # Returns the summary of a naive Bayes model produced by \code{spark.naiveBayes} @@ -317,7 +327,7 @@ setMethod("predict", signature(object = "NaiveBayesModel"), #' @export #' @note summary(NaiveBayesModel) since 2.0.0 setMethod("summary", signature(object = "NaiveBayesModel"), - function(object, ...) { + function(object) { jobj <- object@jobj features <- callJMethod(jobj, "features") labels <- callJMethod(jobj, "labels") @@ -328,7 +338,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"), tables <- matrix(tables, nrow = length(labels)) rownames(tables) <- unlist(labels) colnames(tables) <- unlist(features) - return(list(apriori = apriori, tables = tables)) + list(apriori = apriori, tables = tables) }) # Returns posterior probabilities from a Latent Dirichlet Allocation model produced by spark.lda() @@ -342,7 +352,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"), #' @note spark.posterior(LDAModel) since 2.1.0 setMethod("spark.posterior", signature(object = "LDAModel", newData = "SparkDataFrame"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) # Returns the summary of a Latent Dirichlet Allocation model produced by \code{spark.lda} @@ -377,12 +387,11 @@ setMethod("summary", signature(object = "LDAModel"), vocabSize <- callJMethod(jobj, "vocabSize") topics <- dataFrame(callJMethod(jobj, "topics", maxTermsPerTopic)) vocabulary <- callJMethod(jobj, "vocabulary") - return(list(docConcentration = unlist(docConcentration), - topicConcentration = topicConcentration, - logLikelihood = logLikelihood, logPerplexity = logPerplexity, - isDistributed = isDistributed, vocabSize = vocabSize, - topics = topics, - vocabulary = unlist(vocabulary))) + list(docConcentration = unlist(docConcentration), + topicConcentration = topicConcentration, + logLikelihood = logLikelihood, logPerplexity = logPerplexity, + isDistributed = isDistributed, vocabSize = vocabSize, + topics = topics, vocabulary = unlist(vocabulary)) }) # Returns the log perplexity of a Latent Dirichlet Allocation model produced by \code{spark.lda} @@ -395,8 +404,8 @@ setMethod("summary", signature(object = "LDAModel"), #' @note spark.perplexity(LDAModel) since 2.1.0 setMethod("spark.perplexity", signature(object = "LDAModel", data = "SparkDataFrame"), function(object, data) { - return(ifelse(missing(data), callJMethod(object@jobj, "logPerplexity"), - callJMethod(object@jobj, "computeLogPerplexity", data@sdf))) + ifelse(missing(data), callJMethod(object@jobj, "logPerplexity"), + callJMethod(object@jobj, "computeLogPerplexity", data@sdf)) }) # Saves the Latent Dirichlet Allocation model to the input path. @@ -412,11 +421,7 @@ setMethod("spark.perplexity", signature(object = "LDAModel", data = "SparkDataFr #' @note write.ml(LDAModel, character) since 2.1.0 setMethod("write.ml", signature(object = "LDAModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) #' Isotonic Regression Model @@ -471,9 +476,9 @@ setMethod("spark.isoreg", signature(data = "SparkDataFrame", formula = "formula" } jobj <- callJStatic("org.apache.spark.ml.r.IsotonicRegressionWrapper", "fit", - data@sdf, formula, as.logical(isotonic), as.integer(featureIndex), - as.character(weightCol)) - return(new("IsotonicRegressionModel", jobj = jobj)) + data@sdf, formula, as.logical(isotonic), as.integer(featureIndex), + as.character(weightCol)) + new("IsotonicRegressionModel", jobj = jobj) }) # Predicted values based on an isotonicRegression model @@ -487,7 +492,7 @@ setMethod("spark.isoreg", signature(data = "SparkDataFrame", formula = "formula" #' @note predict(IsotonicRegressionModel) since 2.1.0 setMethod("predict", signature(object = "IsotonicRegressionModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) # Get the summary of an IsotonicRegressionModel model @@ -499,11 +504,11 @@ setMethod("predict", signature(object = "IsotonicRegressionModel"), #' @export #' @note summary(IsotonicRegressionModel) since 2.1.0 setMethod("summary", signature(object = "IsotonicRegressionModel"), - function(object, ...) { + function(object) { jobj <- object@jobj boundaries <- callJMethod(jobj, "boundaries") predictions <- callJMethod(jobj, "predictions") - return(list(boundaries = boundaries, predictions = predictions)) + list(boundaries = boundaries, predictions = predictions) }) #' K-Means Clustering Model @@ -553,7 +558,7 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula" initMode <- match.arg(initMode) jobj <- callJStatic("org.apache.spark.ml.r.KMeansWrapper", "fit", data@sdf, formula, as.integer(k), as.integer(maxIter), initMode) - return(new("KMeansModel", jobj = jobj)) + new("KMeansModel", jobj = jobj) }) #' Get fitted result from a k-means model @@ -576,14 +581,14 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula" #'} #' @note fitted since 2.0.0 setMethod("fitted", signature(object = "KMeansModel"), - function(object, method = c("centers", "classes"), ...) { + function(object, method = c("centers", "classes")) { method <- match.arg(method) jobj <- object@jobj is.loaded <- callJMethod(jobj, "isLoaded") if (is.loaded) { - stop(paste("Saved-loaded k-means model does not support 'fitted' method")) + stop("Saved-loaded k-means model does not support 'fitted' method") } else { - return(dataFrame(callJMethod(jobj, "fitted", method))) + dataFrame(callJMethod(jobj, "fitted", method)) } }) @@ -595,7 +600,7 @@ setMethod("fitted", signature(object = "KMeansModel"), #' @export #' @note summary(KMeansModel) since 2.0.0 setMethod("summary", signature(object = "KMeansModel"), - function(object, ...) { + function(object) { jobj <- object@jobj is.loaded <- callJMethod(jobj, "isLoaded") features <- callJMethod(jobj, "features") @@ -610,8 +615,8 @@ setMethod("summary", signature(object = "KMeansModel"), } else { dataFrame(callJMethod(jobj, "cluster")) } - return(list(coefficients = coefficients, size = size, - cluster = cluster, is.loaded = is.loaded)) + list(coefficients = coefficients, size = size, + cluster = cluster, is.loaded = is.loaded) }) # Predicted values based on a k-means model @@ -623,7 +628,7 @@ setMethod("summary", signature(object = "KMeansModel"), #' @note predict(KMeansModel) since 2.0.0 setMethod("predict", signature(object = "KMeansModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) #' Naive Bayes Models @@ -665,11 +670,11 @@ setMethod("predict", signature(object = "KMeansModel"), #' } #' @note spark.naiveBayes since 2.0.0 setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "formula"), - function(data, formula, smoothing = 1.0, ...) { + function(data, formula, smoothing = 1.0) { formula <- paste(deparse(formula), collapse = "") jobj <- callJStatic("org.apache.spark.ml.r.NaiveBayesWrapper", "fit", formula, data@sdf, smoothing) - return(new("NaiveBayesModel", jobj = jobj)) + new("NaiveBayesModel", jobj = jobj) }) # Saves the Bernoulli naive Bayes model to the input path. @@ -684,11 +689,7 @@ setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "form #' @note write.ml(NaiveBayesModel, character) since 2.0.0 setMethod("write.ml", signature(object = "NaiveBayesModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Saves the AFT survival regression model to the input path. @@ -702,11 +703,7 @@ setMethod("write.ml", signature(object = "NaiveBayesModel", path = "character"), #' @seealso \link{read.ml} setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Saves the generalized linear model to the input path. @@ -720,11 +717,7 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c #' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0 setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Save fitted MLlib model to the input path @@ -738,11 +731,7 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat #' @note write.ml(KMeansModel, character) since 2.0.0 setMethod("write.ml", signature(object = "KMeansModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Save fitted IsotonicRegressionModel to the input path @@ -757,11 +746,7 @@ setMethod("write.ml", signature(object = "KMeansModel", path = "character"), #' @note write.ml(IsotonicRegression, character) since 2.1.0 setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Save fitted MLlib model to the input path @@ -776,11 +761,7 @@ setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "char #' @note write.ml(GaussianMixtureModel, character) since 2.1.0 setMethod("write.ml", signature(object = "GaussianMixtureModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) #' Load a fitted MLlib model from the input path. @@ -801,21 +782,21 @@ read.ml <- function(path) { path <- suppressWarnings(normalizePath(path)) jobj <- callJStatic("org.apache.spark.ml.r.RWrappers", "load", path) if (isInstanceOf(jobj, "org.apache.spark.ml.r.NaiveBayesWrapper")) { - return(new("NaiveBayesModel", jobj = jobj)) + new("NaiveBayesModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.AFTSurvivalRegressionWrapper")) { - return(new("AFTSurvivalRegressionModel", jobj = jobj)) + new("AFTSurvivalRegressionModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper")) { - return(new("GeneralizedLinearRegressionModel", jobj = jobj)) + new("GeneralizedLinearRegressionModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.KMeansWrapper")) { - return(new("KMeansModel", jobj = jobj)) + new("KMeansModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.LDAWrapper")) { - return(new("LDAModel", jobj = jobj)) + new("LDAModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.IsotonicRegressionWrapper")) { - return(new("IsotonicRegressionModel", jobj = jobj)) + new("IsotonicRegressionModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.GaussianMixtureWrapper")) { - return(new("GaussianMixtureModel", jobj = jobj)) + new("GaussianMixtureModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.ALSWrapper")) { - return(new("ALSModel", jobj = jobj)) + new("ALSModel", jobj = jobj) } else { stop(paste("Unsupported model: ", jobj)) } @@ -860,7 +841,7 @@ setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula formula <- paste(deparse(formula), collapse = "") jobj <- callJStatic("org.apache.spark.ml.r.AFTSurvivalRegressionWrapper", "fit", formula, data@sdf) - return(new("AFTSurvivalRegressionModel", jobj = jobj)) + new("AFTSurvivalRegressionModel", jobj = jobj) }) #' Latent Dirichlet Allocation @@ -926,7 +907,7 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"), as.numeric(subsamplingRate), topicConcentration, as.array(docConcentration), as.array(customizedStopWords), maxVocabSize) - return(new("LDAModel", jobj = jobj)) + new("LDAModel", jobj = jobj) }) # Returns a summary of the AFT survival regression model produced by spark.survreg, @@ -946,7 +927,7 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"), coefficients <- as.matrix(unlist(coefficients)) colnames(coefficients) <- c("Value") rownames(coefficients) <- unlist(features) - return(list(coefficients = coefficients)) + list(coefficients = coefficients) }) # Makes predictions from an AFT survival regression model or a model produced by @@ -960,7 +941,7 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"), #' @note predict(AFTSurvivalRegressionModel) since 2.0.0 setMethod("predict", signature(object = "AFTSurvivalRegressionModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) #' Multivariate Gaussian Mixture Model (GMM) @@ -1014,7 +995,7 @@ setMethod("spark.gaussianMixture", signature(data = "SparkDataFrame", formula = formula <- paste(deparse(formula), collapse = "") jobj <- callJStatic("org.apache.spark.ml.r.GaussianMixtureWrapper", "fit", data@sdf, formula, as.integer(k), as.integer(maxIter), as.numeric(tol)) - return(new("GaussianMixtureModel", jobj = jobj)) + new("GaussianMixtureModel", jobj = jobj) }) # Get the summary of a multivariate gaussian mixture model @@ -1027,7 +1008,7 @@ setMethod("spark.gaussianMixture", signature(data = "SparkDataFrame", formula = #' @export #' @note summary(GaussianMixtureModel) since 2.1.0 setMethod("summary", signature(object = "GaussianMixtureModel"), - function(object, ...) { + function(object) { jobj <- object@jobj is.loaded <- callJMethod(jobj, "isLoaded") lambda <- unlist(callJMethod(jobj, "lambda")) @@ -1052,8 +1033,8 @@ setMethod("summary", signature(object = "GaussianMixtureModel"), } else { dataFrame(callJMethod(jobj, "posterior")) } - return(list(lambda = lambda, mu = mu, sigma = sigma, - posterior = posterior, is.loaded = is.loaded)) + list(lambda = lambda, mu = mu, sigma = sigma, + posterior = posterior, is.loaded = is.loaded) }) # Predicted values based on a gaussian mixture model @@ -1067,7 +1048,7 @@ setMethod("summary", signature(object = "GaussianMixtureModel"), #' @note predict(GaussianMixtureModel) since 2.1.0 setMethod("predict", signature(object = "GaussianMixtureModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) #' Alternating Least Squares (ALS) for Collaborative Filtering @@ -1149,7 +1130,7 @@ setMethod("spark.als", signature(data = "SparkDataFrame"), reg, as.integer(maxIter), implicitPrefs, alpha, nonnegative, as.integer(numUserBlocks), as.integer(numItemBlocks), as.integer(checkpointInterval), as.integer(seed)) - return(new("ALSModel", jobj = jobj)) + new("ALSModel", jobj = jobj) }) # Returns a summary of the ALS model produced by spark.als. @@ -1163,17 +1144,17 @@ setMethod("spark.als", signature(data = "SparkDataFrame"), #' @export #' @note summary(ALSModel) since 2.1.0 setMethod("summary", signature(object = "ALSModel"), -function(object, ...) { - jobj <- object@jobj - user <- callJMethod(jobj, "userCol") - item <- callJMethod(jobj, "itemCol") - rating <- callJMethod(jobj, "ratingCol") - userFactors <- dataFrame(callJMethod(jobj, "userFactors")) - itemFactors <- dataFrame(callJMethod(jobj, "itemFactors")) - rank <- callJMethod(jobj, "rank") - return(list(user = user, item = item, rating = rating, userFactors = userFactors, - itemFactors = itemFactors, rank = rank)) -}) + function(object) { + jobj <- object@jobj + user <- callJMethod(jobj, "userCol") + item <- callJMethod(jobj, "itemCol") + rating <- callJMethod(jobj, "ratingCol") + userFactors <- dataFrame(callJMethod(jobj, "userFactors")) + itemFactors <- dataFrame(callJMethod(jobj, "itemFactors")) + rank <- callJMethod(jobj, "rank") + list(user = user, item = item, rating = rating, userFactors = userFactors, + itemFactors = itemFactors, rank = rank) + }) # Makes predictions from an ALS model or a model produced by spark.als. @@ -1185,9 +1166,9 @@ function(object, ...) { #' @export #' @note predict(ALSModel) since 2.1.0 setMethod("predict", signature(object = "ALSModel"), -function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) -}) + function(object, newData) { + predict_internal(object, newData) + }) # Saves the ALS model to the input path. @@ -1203,10 +1184,6 @@ function(object, newData) { #' @seealso \link{read.ml} #' @note write.ml(ALSModel, character) since 2.1.0 setMethod("write.ml", signature(object = "ALSModel", path = "character"), -function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) -}) + function(object, path, overwrite = FALSE) { + write_internal(object, path, overwrite) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/0583ecda/R/pkg/inst/tests/testthat/test_mllib.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index d15c239..de9bd48 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -95,6 +95,10 @@ test_that("spark.glm summary", { expect_equal(stats$df.residual, rStats$df.residual) expect_equal(stats$aic, rStats$aic) + out <- capture.output(print(stats)) + expect_match(out[2], "Deviance Residuals:") + expect_true(any(grepl("AIC: 59.22", out))) + # binomial family df <- suppressWarnings(createDataFrame(iris)) training <- df[df$Species %in% c("versicolor", "virginica"), ] @@ -409,7 +413,7 @@ test_that("spark.naiveBayes", { # Test e1071::naiveBayes if (requireNamespace("e1071", quietly = TRUE)) { - expect_that(m <- e1071::naiveBayes(Survived ~ ., data = t1), not(throws_error())) + expect_error(m <- e1071::naiveBayes(Survived ~ ., data = t1), NA) expect_equal(as.character(predict(m, t1[1, ])), "Yes") } }) @@ -487,7 +491,7 @@ test_that("spark.isotonicRegression", { weightCol = "weight") # only allow one variable on the right hand side of the formula expect_error(model2 <- spark.isoreg(df, ~., isotonic = FALSE)) - result <- summary(model, df) + result <- summary(model) expect_equal(result$predictions, list(7, 5, 4, 4, 1)) # Test model prediction @@ -503,7 +507,7 @@ test_that("spark.isotonicRegression", { expect_error(write.ml(model, modelPath)) write.ml(model, modelPath, overwrite = TRUE) model2 <- read.ml(modelPath) - expect_equal(result, summary(model2, df)) + expect_equal(result, summary(model2)) unlink(modelPath) }) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org