flink-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From trohrm...@apache.org
Subject flink git commit: [FLINK-1737] [ml] Add outer product to Vector class
Date Fri, 08 Jan 2016 17:05:05 GMT
Repository: flink
Updated Branches:
  refs/heads/master 08c81378b -> 009146c7e


[FLINK-1737] [ml] Add outer product to Vector class

Outer/Kronecker product take two vectors v, w and compute a matrix
such that each matrix entry (i,j) is the product of v(i) * w(j).
Depending on the vector types the result is either a DenseMatrix
or a SparseMatrix. If one of the operands is sparse, then the result
is sparse as well.

Implementation of outer product for sparse vectors.

Test cases for outer product computation. For dense as well as sparse vectors, More tests
are to come.

Replaced implementation of  `outer' method in order to avoid call to `SparseVector.apply`
(which involves binary search).

Reduced warning by three: Removed unnecessary `val` keyword from case class fields.

Incorporated suggestion from Till Rohrmann's code review ("avoid binary search by using zipWithIndex").

This closes #1078.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/009146c7
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/009146c7
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/009146c7

Branch: refs/heads/master
Commit: 009146c7e658522766566c8361e9b88ac3b864ea
Parents: 08c8137
Author: daniel-pape <dgpape@web.de>
Authored: Tue Aug 18 20:29:06 2015 +0200
Committer: Till Rohrmann <trohrmann@apache.org>
Committed: Fri Jan 8 18:04:11 2016 +0100

----------------------------------------------------------------------
 .../org/apache/flink/ml/math/DenseVector.scala  |  38 ++++++-
 .../org/apache/flink/ml/math/SparseVector.scala |  43 +++++++-
 .../scala/org/apache/flink/ml/math/Vector.scala |   8 ++
 .../apache/flink/ml/math/DenseVectorSuite.scala | 105 ++++++++++++++++++-
 .../flink/ml/math/SparseVectorSuite.scala       |  88 ++++++++++++++--
 5 files changed, 267 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/009146c7/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
index f242496..5e70741 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
@@ -27,7 +27,7 @@ import breeze.linalg.{SparseVector => BreezeSparseVector, DenseVector
=> BreezeD
  * @param data Array of doubles to store the vector elements
  */
 case class DenseVector(
-    val data: Array[Double])
+    data: Array[Double])
   extends Vector
   with Serializable {
 
@@ -76,8 +76,8 @@ case class DenseVector(
 
   /** Updates the element at the given index with the provided value
     *
-    * @param index
-    * @param value
+    * @param index Index whose value is updated.
+    * @param value The value used to update the index.
     */
   override def update(index: Int, value: Double): Unit = {
     require(0 <= index && index < data.length, index + " not in [0, " + data.length
+ ")")
@@ -102,6 +102,38 @@ case class DenseVector(
     }
   }
 
+  /** Returns the outer product (a.k.a. Kronecker product) of `this`
+    * with `other`. The result will given in [[org.apache.flink.ml.math.SparseMatrix]]
+    * representation if `other` is sparse and as [[org.apache.flink.ml.math.DenseMatrix]]
otherwise.
+    *
+    * @param other a Vector
+    * @return the [[org.apache.flink.ml.math.Matrix]] which equals the outer product of `this`
+    *         with `other.`
+    */
+  override def outer(other: Vector): Matrix = {
+    val numRows = size
+    val numCols = other.size
+
+    other match {
+      case sv: SparseVector =>
+        val entries = for {
+          i <- 0 until numRows
+          (j, k) <- sv.indices.zipWithIndex
+          value = this(i) * sv.data(k)
+          if value != 0
+        } yield (i, j, value)
+
+        SparseMatrix.fromCOO(numRows, numCols, entries)
+      case _ =>
+        val values = for {
+          i <- 0 until numRows
+          j <- 0 until numCols
+        } yield this(i) * other(j)
+
+        DenseMatrix(numRows, numCols, values.toArray)
+    }
+  }
+
   /** Magnitude of a vector
     *
     * @return

http://git-wip-us.apache.org/repos/asf/flink/blob/009146c7/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
index 5a78beb..fec018f 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
@@ -25,13 +25,17 @@ import scala.util.Sorting
 /** Sparse vector implementation storing the data in two arrays. One index contains the sorted
   * indices of the non-zero vector entries and the other the corresponding vector entries
   */
-case class SparseVector(size: Int, indices: Array[Int], data: Array[Double])
+case class SparseVector(
+    size: Int,
+    indices: Array[Int],
+    data: Array[Double])
   extends Vector
   with Serializable {
+
   /** Updates the element at the given index with the provided value
     *
-    * @param index
-    * @param value
+    * @param index Index whose value is updated.
+    * @param value The value used to update the index.
     */
   override def update(index: Int, value: Double): Unit = {
     val resolvedIndex = locate(index)
@@ -82,6 +86,39 @@ case class SparseVector(size: Int, indices: Array[Int], data: Array[Double])
     }
   }
 
+  /** Returns the outer product (a.k.a. Kronecker product) of `this`
+    * with `other`. The result is given in [[org.apache.flink.ml.math.SparseMatrix]]
+    * representation.
+    *
+    * @param other a Vector
+    * @return the [[org.apache.flink.ml.math.SparseMatrix]] which equals the outer product
of `this`
+    *         with `other.`
+    */
+  override def outer(other: Vector): SparseMatrix = {
+    val numRows = size
+    val numCols = other.size
+
+    val entries = other match {
+      case sv: SparseVector =>
+       for {
+          (i, k) <- indices.zipWithIndex
+          (j, l) <- sv.indices.zipWithIndex
+          value = data(k) * sv.data(l)
+          if value != 0
+        } yield (i, j, value)
+      case _ =>
+        for {
+          (i, k) <- indices.zipWithIndex
+          j <- 0 until numCols
+          value = data(k) * other(j)
+          if value != 0
+        } yield (i, j, value)
+    }
+
+    SparseMatrix.fromCOO(numRows, numCols, entries)
+  }
+
+
   /** Magnitude of a vector
     *
     * @return

http://git-wip-us.apache.org/repos/asf/flink/blob/009146c7/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala
index c3a9a39..e52328d 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala
@@ -58,6 +58,14 @@ trait Vector extends Serializable {
     */
   def dot(other: Vector): Double
 
+  /** Returns the outer product of the recipient and the argument
+    *
+    *
+    * @param other a Vector
+    * @return a matrix
+    */
+  def outer(other: Vector): Matrix
+
   /** Magnitude of a vector
     *
     * @return

http://git-wip-us.apache.org/repos/asf/flink/blob/009146c7/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala
b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala
index c7a3dc0..aa1c4f9 100644
--- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala
+++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/DenseVectorSuite.scala
@@ -25,13 +25,13 @@ class DenseVectorSuite extends FlatSpec with Matchers {
   behavior of "Flink's DenseVector"
 
   it should "contain the initialization data" in {
-    val data = Array.range(1,10)
+    val data = Array.range(1, 10)
 
     val vector = DenseVector(data)
 
     assertResult(data.length)(vector.size)
 
-    data.zip(vector.map(_._2)).foreach{case (expected, actual) => assertResult(expected)(actual)}
+    data.zip(vector.map(_._2)).foreach { case (expected, actual) => assertResult(expected)(actual)
}
   }
 
   it should "fail in case of an illegal element access" in {
@@ -47,7 +47,7 @@ class DenseVectorSuite extends FlatSpec with Matchers {
       vector(size)
     }
   }
-  
+
   it should "calculate dot product with DenseVector" in {
     val vec1 = DenseVector(Array(1, 0, 1))
     val vec2 = DenseVector(Array(0, 1, 0))
@@ -78,6 +78,105 @@ class DenseVectorSuite extends FlatSpec with Matchers {
     }
   }
 
+  it should "calculate outer product with DenseVector correctly as DenseMatrix" in {
+    val vec1 = DenseVector(Array(1, 0, 1))
+    val vec2 = DenseVector(Array(0, 1, 0))
+
+    vec1.outer(vec2) should be(an[DenseMatrix])
+    vec1.outer(vec2) should be(DenseMatrix(3, 3, Array(0, 1, 0, 0, 0, 0, 0, 1, 0)))
+  }
+
+  it should "calculate outer product with SparseVector correctly as SparseMatrix" in {
+    val vec1 = DenseVector(Array(1, 0, 1))
+    val vec2 = SparseVector(3, Array(1), Array(1))
+
+    vec1.outer(vec2) should be(an[SparseMatrix])
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1)))
+  }
+
+  it should "calculate outer product with a DenseVector correctly as DenseMatrix 2" in {
+    val vec1 = DenseVector(Array(1, 0, 1, 0, 0))
+    val vec2 = DenseVector(Array(0, 0, 1, 0, 1))
+
+    val values = Array(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0)
+    vec1.outer(vec2) should be(DenseMatrix(5, 5, values))
+  }
+
+  it should "calculate outer product with a SparseVector correctly as SparseMatrix 2" in
{
+    val vec1 = DenseVector(Array(1, 0, 1, 0, 0))
+    val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1))
+
+    val entries = Iterable((0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0))
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, entries))
+  }
+
+
+
+  it should s"""calculate right outer product with DenseVector
+               |with one-dimensional unit DenseVector as identity""".stripMargin in {
+    val vec = DenseVector(Array(1, 0, 1, 0, 0))
+    val unit = DenseVector(1)
+
+    vec.outer(unit) should equal(DenseMatrix(vec.size, 1, vec.data))
+  }
+
+  it should s"""calculate right outer product with DenseVector
+               |with one-dimensional unit SparseVector as identity""".stripMargin in {
+    val vec = DenseVector(Array(1, 0, 1, 0, 0))
+    val unit = SparseVector(1, Array(0), Array(1))
+
+    vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1)))
+  }
+
+  it should s"""calculate left outer product for DenseVector
+               |with one-dimensional unit DenseVector as identity""".stripMargin in {
+    val vec = DenseVector(Array(1, 2, 3, 4, 5))
+    val unit = DenseVector(1)
+
+    unit.outer(vec) should equal(DenseMatrix(1, vec.size, vec.data))
+  }
+
+  it should s"""calculate left outer product for SparseVector
+               |with one-dimensional unit DenseVector as identity""".stripMargin in {
+    val vec = SparseVector(5, Array(0, 1, 2, 3, 4), Array(1, 2, 3, 4, 5))
+    val unit = DenseVector(1)
+
+    val entries = Iterable((0, 0, 1.0), (0, 1, 2.0), (0, 2, 3.0), (0, 3, 4.0), (0, 4, 5.0))
+    unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, entries))
+  }
+
+  it should s"""calculate outer product with DenseVector
+               |via multiplication if both vectors are one-dimensional""".stripMargin in
{
+    val vec1 = DenseVector(Array(2))
+    val vec2 = DenseVector(Array(3))
+
+    vec1.outer(vec2) should be(DenseMatrix(1, 1, 2 * 3))
+  }
+
+  it should s"""calculate outer product with SparseVector
+               |via multiplication if both vectors are one-dimensional""".stripMargin in
{
+    val vec1 = DenseVector(Array(2))
+    val vec2 = SparseVector(1, Array(0), Array(3))
+
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3)))
+  }
+
+  it should "calculate outer product with DenseVector via multiplication if both vectors
" +
+    "are one-dimensional" in {
+    val vec1 = DenseVector(Array(2))
+    val vec2 = DenseVector(Array(3))
+
+    vec1.outer(vec2) should be(DenseMatrix(1, 1, 2 * 3))
+  }
+
+  it should "calculate outer product with SparseVector via multiplication if both vectors
are " +
+    "one-dimensioan" in {
+    val vec1 = DenseVector(Array(2))
+    val vec2 = SparseVector(1, Array(0), Array(3))
+
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3)))
+  }
+
   it should "calculate magnitude of vector" in {
     val vec = DenseVector(Array(1, 4, 8))
 

http://git-wip-us.apache.org/repos/asf/flink/blob/009146c7/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala
b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala
index 5e7e4d7..15eed20 100644
--- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala
+++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/SparseVectorSuite.scala
@@ -29,7 +29,7 @@ class SparseVectorSuite extends FlatSpec with Matchers {
 
     sparseVector(0) should equal(1)
 
-    for(index <- 1 until 3) {
+    for (index <- 1 until 3) {
       sparseVector(index) should equal(0)
     }
   }
@@ -53,13 +53,15 @@ class SparseVectorSuite extends FlatSpec with Matchers {
     denseVector should equal(expectedDenseVector)
 
     val dataMap = data.
-      groupBy{_._1}.
-      mapValues{
+      groupBy {
+      _._1
+    }.
+      mapValues {
       entries =>
         entries.map(_._2).sum
     }
 
-    for(index <- 0 until size) {
+    for (index <- 0 until size) {
       sparseVector(index) should be(dataMap.getOrElse(index, 0))
     }
   }
@@ -82,7 +84,7 @@ class SparseVectorSuite extends FlatSpec with Matchers {
     }
 
     intercept[IllegalArgumentException] {
-      val sparseVector = SparseVector.fromCOO(5, (0, 1), (4,3), (5, 1))
+      val sparseVector = SparseVector.fromCOO(5, (0, 1), (4, 3), (5, 1))
     }
   }
 
@@ -95,7 +97,7 @@ class SparseVectorSuite extends FlatSpec with Matchers {
 
     copy(3) = 3
 
-    sparseVector should not equal copy
+    sparseVector should not equal (copy)
   }
 
   it should "calculate dot product with SparseVector" in {
@@ -128,6 +130,80 @@ class SparseVectorSuite extends FlatSpec with Matchers {
     }
   }
 
+  it should "calculate outer product with SparseVector correctly as SparseMatrix" in {
+    val vec1 = SparseVector(3, Array(0, 2), Array(1, 1))
+    val vec2 = SparseVector(3, Array(1), Array(1))
+
+    vec1.outer(vec2) should be(an[SparseMatrix])
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1)))
+  }
+
+  it should "calculate outer product with DenseVector correctly as SparseMatrix" in {
+    val vec1 = SparseVector(3, Array(0, 2), Array(1, 1))
+    val vec2 = DenseVector(Array(0, 1, 0))
+
+    vec1.outer(vec2) should be(an[SparseMatrix])
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(3, 3, (0, 1, 1), (2, 1, 1)))
+  }
+
+  it should "calculate outer product with a DenseVector correctly as SparseMatrix 2" in {
+    val vec1 = SparseVector(5, Array(0, 2), Array(1, 1))
+    val vec2 = DenseVector(Array(0, 0, 1, 0, 1))
+
+    val entries = Iterable((0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0))
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, entries))
+  }
+
+  it should "calculate outer product with a SparseVector correctly as SparseMatrix 2" in
{
+    val vec1 = SparseVector(5, Array(0, 2), Array(1, 1))
+    val vec2 = SparseVector.fromCOO(5, (2, 1), (4, 1))
+
+    val entries = Iterable((0, 2, 1.0), (0, 4, 1.0), (2, 2, 1.0), (2, 4, 1.0))
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(5, 5, entries))
+  }
+
+
+  it should s"""calculate right outer product with DenseVector
+               |with one-dimensional unit DenseVector as identity""".stripMargin in {
+    val vec = SparseVector(5, Array(0, 2), Array(1, 1))
+    val unit = DenseVector(1)
+
+    vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1)))
+  }
+
+  it should s"""calculate right outer product with DenseVector
+               |with one-dimensional unit SparseVector as identity""".stripMargin in {
+    val vec = SparseVector(5, Array(0, 2), Array(1, 1))
+    val unit = SparseVector(1, Array(0), Array(1))
+
+    vec.outer(unit) should equal(SparseMatrix.fromCOO(vec.size, 1, (0, 0, 1), (2, 0, 1)))
+  }
+
+  it should s"""calculate left outer product for SparseVector
+               |with one-dimensional unit DenseVector as identity""".stripMargin in {
+    val vec = SparseVector(5, Array(0, 1, 2, 3, 4), Array(1, 2, 3, 4, 5))
+    val unit = DenseVector(1)
+
+    val entries = Iterable((0, 0, 1.0), (0, 1, 2.0), (0, 2, 3.0), (0, 3, 4.0), (0, 4, 5.0))
+    unit.outer(vec) should equal(SparseMatrix.fromCOO(1, vec.size, entries))
+  }
+
+  it should s"""calculate outer product with SparseVector
+               |via multiplication if both vectors are one-dimensional""".stripMargin in
{
+    val vec1 = SparseVector.fromCOO(1, (0, 2))
+    val vec2 = SparseVector.fromCOO(1, (0, 3))
+
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3)))
+  }
+
+  it should s"""calculate outer product with DenseVector
+               |via multiplication if both vectors are one-dimensional""".stripMargin in
{
+    val vec1 = SparseVector(1, Array(0), Array(2))
+    val vec2 = DenseVector(Array(3))
+
+    vec1.outer(vec2) should be(SparseMatrix.fromCOO(1, 1, (0, 0, 2 * 3)))
+  }
+
   it should "calculate magnitude of vector" in {
     val vec = SparseVector.fromCOO(3, (0, 1), (1, 4), (2, 8))
 


Mime
View raw message