From commits-return-32810-archive-asf-public=cust-asf.ponee.io@spark.apache.org Thu Aug 9 15:07:52 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id A469F18065B for ; Thu, 9 Aug 2018 15:07:51 +0200 (CEST) Received: (qmail 71651 invoked by uid 500); 9 Aug 2018 13:07:50 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 71642 invoked by uid 99); 9 Aug 2018 13:07:50 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 09 Aug 2018 13:07:50 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 7C165DFB18; Thu, 9 Aug 2018 13:07:50 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: srowen@apache.org To: commits@spark.apache.org Message-Id: <00f12dae57f44b3db6bbe380eb25c1a0@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-25047][ML] Can't assign SerializedLambda to scala.Function1 in deserialization of BucketedRandomProjectionLSHModel Date: Thu, 9 Aug 2018 13:07:50 +0000 (UTC) Repository: spark Updated Branches: refs/heads/master b2950cef3 -> 1a7e747ce [SPARK-25047][ML] Can't assign SerializedLambda to scala.Function1 in deserialization of BucketedRandomProjectionLSHModel ## What changes were proposed in this pull request? Convert two function fields in ML classes to simple functions to avoi…d odd SerializedLambda deserialization problem ## How was this patch tested? Existing tests. Closes #22032 from srowen/SPARK-25047. Authored-by: Sean Owen Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1a7e747c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1a7e747c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1a7e747c Branch: refs/heads/master Commit: 1a7e747ce4f8c5253c5923045d23c62e43a6566b Parents: b2950ce Author: Sean Owen Authored: Thu Aug 9 08:07:46 2018 -0500 Committer: Sean Owen Committed: Thu Aug 9 08:07:46 2018 -0500 ---------------------------------------------------------------------- .../feature/BucketedRandomProjectionLSH.scala | 14 ++++++-------- .../scala/org/apache/spark/ml/feature/LSH.scala | 4 ++-- .../apache/spark/ml/feature/MinHashLSH.scala | 20 +++++++++----------- .../GeneralizedLinearRegression.scala | 15 +++++++-------- 4 files changed, 24 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/1a7e747c/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala index a906e95..0554455 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala @@ -82,14 +82,12 @@ class BucketedRandomProjectionLSHModel private[ml]( override def setOutputCol(value: String): this.type = super.set(outputCol, value) @Since("2.1.0") - override protected[ml] val hashFunction: Vector => Array[Vector] = { - key: Vector => { - val hashValues: Array[Double] = randUnitVectors.map({ - randUnitVector => Math.floor(BLAS.dot(key, randUnitVector) / $(bucketLength)) - }) - // TODO: Output vectors of dimension numHashFunctions in SPARK-18450 - hashValues.map(Vectors.dense(_)) - } + override protected[ml] def hashFunction(elems: Vector): Array[Vector] = { + val hashValues = randUnitVectors.map( + randUnitVector => Math.floor(BLAS.dot(elems, randUnitVector) / $(bucketLength)) + ) + // TODO: Output vectors of dimension numHashFunctions in SPARK-18450 + hashValues.map(Vectors.dense(_)) } @Since("2.1.0") http://git-wip-us.apache.org/repos/asf/spark/blob/1a7e747c/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala index a70931f..b208523 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala @@ -75,7 +75,7 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]] * The hash function of LSH, mapping an input feature vector to multiple hash vectors. * @return The mapping of LSH function. */ - protected[ml] val hashFunction: Vector => Array[Vector] + protected[ml] def hashFunction(elems: Vector): Array[Vector] /** * Calculate the distance between two different keys using the distance metric corresponding @@ -97,7 +97,7 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]] override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) - val transformUDF = udf(hashFunction, DataTypes.createArrayType(new VectorUDT)) + val transformUDF = udf(hashFunction(_: Vector), DataTypes.createArrayType(new VectorUDT)) dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol)))) } http://git-wip-us.apache.org/repos/asf/spark/blob/1a7e747c/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala index a043033..21cde66 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala @@ -60,18 +60,16 @@ class MinHashLSHModel private[ml]( override def setOutputCol(value: String): this.type = super.set(outputCol, value) @Since("2.1.0") - override protected[ml] val hashFunction: Vector => Array[Vector] = { - elems: Vector => { - require(elems.numNonzeros > 0, "Must have at least 1 non zero entry.") - val elemsList = elems.toSparse.indices.toList - val hashValues = randCoefficients.map { case (a, b) => - elemsList.map { elem: Int => - ((1L + elem) * a + b) % MinHashLSH.HASH_PRIME - }.min.toDouble - } - // TODO: Output vectors of dimension numHashFunctions in SPARK-18450 - hashValues.map(Vectors.dense(_)) + override protected[ml] def hashFunction(elems: Vector): Array[Vector] = { + require(elems.numNonzeros > 0, "Must have at least 1 non zero entry.") + val elemsList = elems.toSparse.indices.toList + val hashValues = randCoefficients.map { case (a, b) => + elemsList.map { elem: Int => + ((1L + elem) * a + b) % MinHashLSH.HASH_PRIME + }.min.toDouble } + // TODO: Output vectors of dimension numHashFunctions in SPARK-18450 + hashValues.map(Vectors.dense(_)) } @Since("2.1.0") http://git-wip-us.apache.org/repos/asf/spark/blob/1a7e747c/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala index 20878b6..abb60ea 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala @@ -515,14 +515,13 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine * The reweight function used to update working labels and weights * at each iteration of [[IterativelyReweightedLeastSquares]]. */ - val reweightFunc: (OffsetInstance, WeightedLeastSquaresModel) => (Double, Double) = { - (instance: OffsetInstance, model: WeightedLeastSquaresModel) => { - val eta = model.predict(instance.features) + instance.offset - val mu = fitted(eta) - val newLabel = eta - instance.offset + (instance.label - mu) * link.deriv(mu) - val newWeight = instance.weight / (math.pow(this.link.deriv(mu), 2.0) * family.variance(mu)) - (newLabel, newWeight) - } + def reweightFunc( + instance: OffsetInstance, model: WeightedLeastSquaresModel): (Double, Double) = { + val eta = model.predict(instance.features) + instance.offset + val mu = fitted(eta) + val newLabel = eta - instance.offset + (instance.label - mu) * link.deriv(mu) + val newWeight = instance.weight / (math.pow(this.link.deriv(mu), 2.0) * family.variance(mu)) + (newLabel, newWeight) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org