Return-Path: X-Original-To: apmail-spark-reviews-archive@minotaur.apache.org Delivered-To: apmail-spark-reviews-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4FF2710ABC for ; Wed, 17 Dec 2014 22:37:59 +0000 (UTC) Received: (qmail 34805 invoked by uid 500); 17 Dec 2014 22:37:59 -0000 Delivered-To: apmail-spark-reviews-archive@spark.apache.org Received: (qmail 34781 invoked by uid 500); 17 Dec 2014 22:37:59 -0000 Mailing-List: contact reviews-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list reviews@spark.apache.org Received: (qmail 34765 invoked by uid 99); 17 Dec 2014 22:37:58 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 17 Dec 2014 22:37:58 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 7C8D5831548; Wed, 17 Dec 2014 22:37:58 +0000 (UTC) From: mengxr To: reviews@spark.apache.org Reply-To: reviews@spark.apache.org References: In-Reply-To: Subject: [GitHub] spark pull request: [SPARK-4409][MLlib] Additional Linear Algebra ... Content-Type: text/plain Message-Id: <20141217223758.7C8D5831548@tyr.zones.apache.org> Date: Wed, 17 Dec 2014 22:37:58 +0000 (UTC) Github user mengxr commented on a diff in the pull request: https://github.com/apache/spark/pull/3319#discussion_r22010780 --- Diff: mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala --- @@ -197,6 +300,171 @@ class SparseMatrix( } override def copy = new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone()) + + private[mllib] def map(f: Double => Double) = + new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.map(f)) + + private[mllib] def update(f: Double => Double): SparseMatrix = { + val len = values.length + var i = 0 + while (i < len) { + values(i) = f(values(i)) + i += 1 + } + this + } +} + +/** + * Factory methods for [[org.apache.spark.mllib.linalg.SparseMatrix]]. + */ +object SparseMatrix { + + /** + * Generate an Identity Matrix in `SparseMatrix` format. + * @param n number of rows and columns of the matrix + * @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal + */ + def speye(n: Int): SparseMatrix = { + new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0)) + } + + /** Generates a SparseMatrix given an Array[Double] of size numRows * numCols. The number of + * non-zeros in `raw` is provided for efficiency. */ + private def genRand( + numRows: Int, + numCols: Int, + raw: Array[Double], + nonZero: Int): SparseMatrix = { + val sparseA: ArrayBuffer[Double] = new ArrayBuffer(nonZero) + val sCols: ArrayBuffer[Int] = new ArrayBuffer(numCols + 1) + val sRows: ArrayBuffer[Int] = new ArrayBuffer(nonZero) + + var i = 0 + var nnz = 0 + var lastCol = -1 + raw.foreach { v => + val r = i % numRows + val c = (i - r) / numRows + if (v != 0.0) { + sRows.append(r) + sparseA.append(v) + while (c != lastCol) { + sCols.append(nnz) + lastCol += 1 + } + nnz += 1 + } + i += 1 + } + while (numCols > lastCol) { + sCols.append(sparseA.length) + lastCol += 1 + } + new SparseMatrix(numRows, numCols, sCols.toArray, sRows.toArray, sparseA.toArray) + } + + /** + * Generate a `SparseMatrix` consisting of i.i.d. uniform random numbers. + * @param numRows number of rows of the matrix + * @param numCols number of columns of the matrix + * @param density the desired density for the matrix + * @param rng a random number generator + * @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1) + */ + def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = { + require(density >= 0.0 && density <= 1.0, "density must be a double in the range " + + s"0.0 <= d <= 1.0. Currently, density: $density") + val length = numRows * numCols + val rawA = new Array[Double](length) + var nnz = 0 + for (i <- 0 until length) { + val p = rng.nextDouble() + if (p <= density) { + rawA.update(i, rng.nextDouble()) + nnz += 1 + } + } + genRand(numRows, numCols, rawA, nnz) + } + + /** + * Generate a `SparseMatrix` consisting of i.i.d. gaussian random numbers. + * @param numRows number of rows of the matrix + * @param numCols number of columns of the matrix + * @param density the desired density for the matrix + * @param rng a random number generator + * @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1) + */ + def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = { + require(density >= 0.0 && density <= 1.0, "density must be a double in the range " + + s"0.0 <= d <= 1.0. Currently, density: $density") + val length = numRows * numCols + val rawA = new Array[Double](length) + var nnz = 0 + for (i <- 0 until length) { + val p = rng.nextDouble() + if (p <= density) { + rawA.update(i, rng.nextGaussian()) + nnz += 1 + } + } + genRand(numRows, numCols, rawA, nnz) + } + + /** + * Generate a diagonal matrix in `SparseMatrix` format from the supplied values. + * @param vector a `Vector` that will form the values on the diagonal of the matrix + * @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero + * `values` on the diagonal + */ + def diag(vector: Vector): SparseMatrix = { + val n = vector.size + vector match { + case sVec: SparseVector => + val rows = sVec.indices --- End diff -- The logic could be simplified by adding a factory method that takes a sparse matrix in the coordinate list (COO) format and turns it into CSC. Then `diag` just calls that method. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastructure@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org For additional commands, e-mail: reviews-help@spark.apache.org