flink-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From trohrm...@apache.org
Subject flink git commit: [FLINK-1717] [ml] Adds support to directly read libSVM and SVMLight files
Date Wed, 01 Apr 2015 15:17:13 GMT
Repository: flink
Updated Branches:
  refs/heads/master cbc8423e2 -> 015af177d


[FLINK-1717] [ml] Adds support to directly read libSVM and SVMLight files

This closes #543.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/015af177
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/015af177
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/015af177

Branch: refs/heads/master
Commit: 015af177d1b54ea7bdf860a3a48e5a8f022d8304
Parents: cbc8423
Author: Till Rohrmann <trohrmann@apache.org>
Authored: Sat Mar 28 18:31:02 2015 +0100
Committer: Till Rohrmann <trohrmann@apache.org>
Committed: Wed Apr 1 16:42:20 2015 +0200

----------------------------------------------------------------------
 flink-staging/flink-ml/pom.xml                  |   8 +
 .../scala/org/apache/flink/ml/MLUtils.scala     | 122 +++++++++++++
 .../apache/flink/ml/common/LabeledVector.scala  |  17 +-
 .../flink/ml/feature/PolynomialBase.scala       |   2 +-
 .../org/apache/flink/ml/math/DenseMatrix.scala  |   9 +-
 .../org/apache/flink/ml/math/DenseVector.scala  |   2 +-
 .../org/apache/flink/ml/math/SparseMatrix.scala |   3 +-
 .../org/apache/flink/ml/math/SparseVector.scala |   8 +-
 .../scala/org/apache/flink/ml/package.scala     |  46 +++++
 .../regression/MultipleLinearRegression.scala   |   2 +-
 .../org/apache/flink/ml/MLUtilsSuite.scala      | 112 ++++++++++++
 .../ml/feature/PolynomialBaseITSuite.scala      |  12 +-
 .../flink/ml/regression/RegressionData.scala    | 180 +++++++++----------
 13 files changed, 417 insertions(+), 106 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/pom.xml
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/pom.xml b/flink-staging/flink-ml/pom.xml
index 899d266..91005f8 100644
--- a/flink-staging/flink-ml/pom.xml
+++ b/flink-staging/flink-ml/pom.xml
@@ -63,6 +63,14 @@
 
 		<dependency>
 			<groupId>org.apache.flink</groupId>
+			<artifactId>flink-core</artifactId>
+			<version>${project.version}</version>
+			<type>test-jar</type>
+			<scope>test</scope>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.flink</groupId>
 			<artifactId>flink-test-utils</artifactId>
 			<version>${project.version}</version>
 			<scope>test</scope>

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/MLUtils.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/MLUtils.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/MLUtils.scala
new file mode 100644
index 0000000..a327ddc
--- /dev/null
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/MLUtils.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.ml
+
+import org.apache.flink.api.common.functions.RichMapFunction
+import org.apache.flink.api.java.operators.DataSink
+import org.apache.flink.api.scala._
+import org.apache.flink.configuration.Configuration
+import org.apache.flink.ml.common.LabeledVector
+import org.apache.flink.ml.math.SparseVector
+
+/** Convenience functions for machine learning tasks
+  *
+  * This object contains convenience functions for machine learning tasks:
+  *
+  * - readLibSVM:
+  *   Reads a libSVM/SVMLight input file and returns a data set of [[LabeledVector]].
+  *   The file format is specified [http://svmlight.joachims.org/ here].
+  *
+  * - writeLibSVM:
+  *   Writes a data set of [[LabeledVector]] in libSVM/SVMLight format to disk. THe file
format
+  *   is specified [http://svmlight.joachims.org/ here].
+  */
+object MLUtils {
+
+  val DIMENSION = "dimension"
+
+  /** Reads a file in libSVM/SVMLight format and converts the data into a data set of
+    * [[LabeledVector]]. The dimension of the [[LabeledVector]] is determined automatically.
+    *
+    * Since the libSVM/SVMLight format stores a vector in its sparse form, the [[LabeledVector]]
+    * will also be instantiated with a [[SparseVector]].
+    *
+    * @param env [[ExecutionEnvironment]]
+    * @param filePath Path to the input file
+    * @return [[DataSet]] of [[LabeledVector]] containing the information of the libSVM/SVMLight
+    *        file
+    */
+  def readLibSVM(env: ExecutionEnvironment, filePath: String): DataSet[LabeledVector] = {
+    val labelCOODS = env.readTextFile(filePath).flatMap {
+      line =>
+        // remove all comments which start with a '#'
+        val commentFreeLine = line.takeWhile(_ != '#').trim
+
+        if(commentFreeLine.nonEmpty) {
+          val splits = commentFreeLine.split(' ')
+          val label = splits.head.toDouble
+          val sparseFeatures = splits.tail
+          val coos = sparseFeatures.map {
+            str =>
+              val pair = str.split(':')
+              require(pair.length == 2, "Each feature entry has to have the form <feature>:<value>")
+
+              // libSVM index is 1-based, but we expect it to be 0-based
+              val index = pair(0).toInt - 1
+              val value = pair(1).toDouble
+
+              (index, value)
+          }
+
+          Some((label, coos))
+        } else {
+          None
+        }
+    }
+
+    // Calculate maximum dimension of vectors
+    val dimensionDS = labelCOODS.map {
+      labelCOO =>
+        labelCOO._2.map( _._1 + 1 ).max
+    }.reduce(scala.math.max(_, _))
+
+    labelCOODS.map{ new RichMapFunction[(Double, Array[(Int, Double)]), LabeledVector] {
+      var dimension = 0
+
+      override def open(configuration: Configuration): Unit = {
+        dimension = getRuntimeContext.getBroadcastVariable(DIMENSION).get(0)
+      }
+
+      override def map(value: (Double, Array[(Int, Double)])): LabeledVector = {
+        new LabeledVector(value._1, SparseVector.fromCOO(dimension, value._2))
+      }
+    }}.withBroadcastSet(dimensionDS, DIMENSION)
+  }
+
+  /** Writes a [[DataSet]] of [[LabeledVector]] to a file using the libSVM/SVMLight format.
+    * 
+    * @param filePath Path to output file
+    * @param labeledVectors [[DataSet]] of [[LabeledVector]] to write to disk
+    * @return
+    */
+  def writeLibSVM(filePath: String, labeledVectors: DataSet[LabeledVector]): DataSink[String]
= {
+    val stringRepresentation = labeledVectors.map{
+      labeledVector =>
+        val vectorStr = labeledVector.vector.
+          // remove zero entries
+          filter( _._2 != 0).
+          map{case (idx, value) => (idx + 1) + ":" + value}.
+          mkString(" ")
+
+        labeledVector.label + " " + vectorStr
+    }
+
+    stringRepresentation.writeAsText(filePath)
+  }
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala
b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala
index f3d6172..4563724 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala
@@ -23,7 +23,20 @@ import org.apache.flink.ml.math.Vector
 /** This class represents a vector with an associated label as it is required for many supervised
   * learning tasks.
   *
-  * @param vector Data point
   * @param label Label of the data point
+  * @param vector Data point
   */
-case class LabeledVector(vector: Vector, label: Double) {}
+case class LabeledVector(label: Double, vector: Vector) {
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case labeledVector: LabeledVector =>
+        vector.equals(labeledVector.vector) && label.equals(labeledVector.label)
+      case _ => false
+    }
+  }
+
+  override def toString: String = {
+    s"LabeledVector($label, $vector)"
+  }
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala
b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala
index 04f698e..61f477c 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala
@@ -75,7 +75,7 @@ class PolynomialBase extends Transformer[LabeledVector, LabeledVector] with
Seri
 
         val transformedVector = calculatePolynomial(degree, vector)
 
-        LabeledVector(transformedVector, label)
+        LabeledVector(label, transformedVector)
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala
b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala
index 16291b8..fd490e1 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala
@@ -26,9 +26,12 @@ package org.apache.flink.ml.math
  * @param numCols Number of columns
  * @param data Array of matrix elements in column major order
  */
-case class DenseMatrix(val numRows: Int,
-                  val numCols: Int,
-                  val data: Array[Double]) extends Matrix {
+case class DenseMatrix(
+    val numRows: Int,
+    val numCols: Int,
+    val data: Array[Double])
+  extends Matrix
+  with Serializable{
 
   import DenseMatrix._
 

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
index 50992a9..ab657c5 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
@@ -24,7 +24,7 @@ package org.apache.flink.ml.math
  *
  * @param data Array of doubles to store the vector elements
  */
-case class DenseVector(val data: Array[Double]) extends Vector {
+case class DenseVector(val data: Array[Double]) extends Vector with Serializable {
 
   /**
    * Number of elements in a vector

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala
b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala
index b065630..c9842f8 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala
@@ -37,7 +37,8 @@ class SparseMatrix(
     val rowIndices: Array[Int],
     val colPtrs: Array[Int],
     val data: Array[Double])
-  extends Matrix {
+  extends Matrix
+  with Serializable {
 
   /** Element wise access function
     *

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
index 9fa69cb..2c63203 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
@@ -27,7 +27,8 @@ class SparseVector(
     val size: Int,
     val indices: Array[Int],
     val data: Array[Double])
-  extends Vector {
+  extends Vector
+  with Serializable {
   /** Updates the element at the given index with the provided value
     *
     * @param index
@@ -77,6 +78,11 @@ class SparseVector(
     denseVector
   }
 
+  override def toString: String = {
+    val entries = indices.zip(data).mkString(", ")
+    "SparseVector(" + entries + ")"
+  }
+
   private def locate(index: Int): Int = {
     require(0 <= index && index < size, index + " not in [0, " + size + ")")
 

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/package.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/package.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/package.scala
new file mode 100644
index 0000000..250c8cb
--- /dev/null
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/package.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink
+
+import org.apache.flink.api.java.operators.DataSink
+import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment}
+import org.apache.flink.ml.common.LabeledVector
+
+package object ml {
+
+  /** Pimp my [[ExecutionEnvironment]] to directly support `readLibSVM`
+    *
+    * @param executionEnvironment
+    */
+  implicit class RichExecutionEnvironment(executionEnvironment: ExecutionEnvironment) {
+    def readLibSVM(path: String): DataSet[LabeledVector] = {
+      MLUtils.readLibSVM(executionEnvironment, path)
+    }
+  }
+
+  /** Pimp my [[DataSet]] to directly support `writeAsLibSVM`
+    *
+    * @param dataSet
+    */
+  implicit class RichDataSet(dataSet: DataSet[LabeledVector]) {
+    def writeAsLibSVM(path: String): DataSink[String] = {
+      MLUtils.writeLibSVM(path, dataSet)
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala
b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala
index 9768cce..076156d 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala
@@ -441,7 +441,7 @@ Transformer[ Vector, LabeledVector ] {
 
       val prediction = dotProduct + weight0
 
-      LabeledVector(value, prediction)
+      LabeledVector(prediction, value)
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/MLUtilsSuite.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/MLUtilsSuite.scala
b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/MLUtilsSuite.scala
new file mode 100644
index 0000000..47682ce
--- /dev/null
+++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/MLUtilsSuite.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.ml
+
+import java.io.File
+
+import scala.io.Source
+
+import org.scalatest.{FlatSpec, Matchers}
+
+import org.apache.flink.api.scala._
+import org.apache.flink.api.scala.ExecutionEnvironment
+import org.apache.flink.ml.common.LabeledVector
+import org.apache.flink.ml.math.SparseVector
+import org.apache.flink.test.util.FlinkTestBase
+import org.apache.flink.testutils.TestFileUtils
+
+class MLUtilsSuite extends FlatSpec with Matchers with FlinkTestBase {
+
+  behavior of "The RichExecutionEnvironment"
+
+  it should "read a libSVM/SVMLight input file" in {
+    val env = ExecutionEnvironment.getExecutionEnvironment
+
+    val content =
+      """
+        |1 2:10.0 4:4.5 8:4.2 # foo
+        |-1 1:9.0 4:-4.5 7:2.4 # bar
+        |0.4 3:1.0 8:-5.6 10:1.0
+        |-42.1 2:2.0 4:-6.1 3:5.1 # svm
+      """.stripMargin
+
+    val expectedLabeledVectors = Set(
+      LabeledVector(1, SparseVector.fromCOO(10, (1, 10), (3, 4.5), (7, 4.2))),
+      LabeledVector(-1, SparseVector.fromCOO(10, (0, 9), (3, -4.5), (6, 2.4))),
+      LabeledVector(0.4, SparseVector.fromCOO(10, (2, 1), (7, -5.6), (9, 1))),
+      LabeledVector(-42.1, SparseVector.fromCOO(10, (1, 2), (3, -6.1), (2, 5.1)))
+    )
+
+    val inputFilePath = TestFileUtils.createTempFile(content)
+
+    val svmInput = env.readLibSVM(inputFilePath)
+
+    val labeledVectors = svmInput.collect
+
+    labeledVectors.size should be(expectedLabeledVectors.size)
+
+    for(lVector <- labeledVectors) {
+      expectedLabeledVectors.contains(lVector) should be(true)
+    }
+
+  }
+
+  it should "write a libSVM/SVMLight output file" in {
+    val env = ExecutionEnvironment.getExecutionEnvironment
+
+    val labeledVectors = Seq(
+      LabeledVector(1.0, SparseVector.fromCOO(10, (1, 10), (3, 4.5), (7, 4.2))),
+      LabeledVector(-1.0, SparseVector.fromCOO(10, (0, 9), (3, -4.5), (6, 2.4))),
+      LabeledVector(0.4, SparseVector.fromCOO(10, (2, 1), (7, -5.6), (9, 1))),
+      LabeledVector(-42.1, SparseVector.fromCOO(10, (1, 2), (3, -6.1), (2, 5.1)))
+    )
+
+    val expectedLines = List(
+      "1.0 2:10.0 4:4.5 8:4.2",
+      "-1.0 1:9.0 4:-4.5 7:2.4",
+      "0.4 3:1.0 8:-5.6 10:1.0",
+      "-42.1 2:2.0 3:5.1 4:-6.1"
+    )
+
+    val labeledVectorsDS = env.fromCollection(labeledVectors)
+
+    val tempDir = new File(System.getProperty("java.io.tmpdir"));
+
+    val tempFile = new File(tempDir, TestFileUtils.randomFileName())
+
+    val outputFilePath = tempFile.getAbsolutePath
+
+    labeledVectorsDS.writeAsLibSVM(outputFilePath)
+
+    env.execute()
+
+    val src = Source.fromFile(tempFile)
+
+    var counter = 0
+
+    for(l <- src.getLines()) {
+      expectedLines.exists(_.equals(l)) should be(true)
+      counter += 1
+    }
+
+    counter should be(expectedLines.size)
+
+    tempFile.delete()
+  }
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala
b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala
index 5529e17..0b321ff 100644
--- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala
+++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala
@@ -39,8 +39,8 @@ class PolynomialBaseITSuite
     env.setParallelism (2)
 
     val input = Seq (
-    LabeledVector (DenseVector (1), 1.0),
-    LabeledVector (DenseVector (2), 2.0)
+    LabeledVector (1.0, DenseVector (1)),
+    LabeledVector (2.0, DenseVector (2))
     )
 
     val inputDS = env.fromCollection (input)
@@ -69,8 +69,8 @@ class PolynomialBaseITSuite
     env.setParallelism(2)
 
     val input = Seq(
-      LabeledVector(DenseVector(2, 3), 1.0),
-      LabeledVector(DenseVector(2, 3, 4), 2.0)
+      LabeledVector(1.0, DenseVector(2, 3)),
+      LabeledVector(2.0, DenseVector(2, 3, 4))
     )
 
     val expectedMap = List(
@@ -100,8 +100,8 @@ class PolynomialBaseITSuite
     env.setParallelism(2)
 
     val input = Seq(
-      LabeledVector(DenseVector(2, 3), 1.0),
-      LabeledVector(DenseVector(2, 3, 4), 2.0)
+      LabeledVector(1.0, DenseVector(2, 3)),
+      LabeledVector(2.0, DenseVector(2, 3, 4))
     )
 
     val inputDS = env.fromCollection(input)

http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala
b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala
index 349f14f..82b4cc3 100644
--- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala
+++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala
@@ -28,46 +28,46 @@ object RegressionData {
   val expectedSquaredResidualSum: Double = 49.7596
 
   val data: Seq[LabeledVector] = Seq(
-    LabeledVector(DenseVector(0.2714), 10.7949),
-    LabeledVector(DenseVector(0.1008), 10.6426),
-    LabeledVector(DenseVector(0.5078), 10.5603),
-    LabeledVector(DenseVector(0.5856), 12.8707),
-    LabeledVector(DenseVector(0.7629), 10.7026),
-    LabeledVector(DenseVector(0.0830), 9.8571),
-    LabeledVector(DenseVector(0.6616), 10.5001),
-    LabeledVector(DenseVector(0.5170), 11.2063),
-    LabeledVector(DenseVector(0.1710), 9.1892),
-    LabeledVector(DenseVector(0.9386), 12.2408),
-    LabeledVector(DenseVector(0.5905), 11.0307),
-    LabeledVector(DenseVector(0.4406), 10.1369),
-    LabeledVector(DenseVector(0.9419), 10.7609),
-    LabeledVector(DenseVector(0.6559), 12.5328),
-    LabeledVector(DenseVector(0.4519), 13.3560),
-    LabeledVector(DenseVector(0.8397), 14.7424),
-    LabeledVector(DenseVector(0.5326), 11.1057),
-    LabeledVector(DenseVector(0.5539), 11.6157),
-    LabeledVector(DenseVector(0.6801), 11.5744),
-    LabeledVector(DenseVector(0.3672), 11.1775),
-    LabeledVector(DenseVector(0.2393), 9.7991),
-    LabeledVector(DenseVector(0.5789), 9.8173),
-    LabeledVector(DenseVector(0.8669), 12.5642),
-    LabeledVector(DenseVector(0.4068), 9.9952),
-    LabeledVector(DenseVector(0.1126), 8.4354),
-    LabeledVector(DenseVector(0.4438), 13.7058),
-    LabeledVector(DenseVector(0.3002), 10.6672),
-    LabeledVector(DenseVector(0.4014), 11.6080),
-    LabeledVector(DenseVector(0.8334), 13.6926),
-    LabeledVector(DenseVector(0.4036), 9.5261),
-    LabeledVector(DenseVector(0.3902), 11.5837),
-    LabeledVector(DenseVector(0.3604), 11.5831),
-    LabeledVector(DenseVector(0.1403), 10.5038),
-    LabeledVector(DenseVector(0.2601), 10.9382),
-    LabeledVector(DenseVector(0.0868), 9.7325),
-    LabeledVector(DenseVector(0.4294), 12.0113),
-    LabeledVector(DenseVector(0.2573), 9.9219),
-    LabeledVector(DenseVector(0.2976), 10.0963),
-    LabeledVector(DenseVector(0.4249), 11.9999),
-    LabeledVector(DenseVector(0.1192), 12.0442)
+    LabeledVector(10.7949, DenseVector(0.2714)),
+    LabeledVector(10.6426, DenseVector(0.1008)),
+    LabeledVector(10.5603, DenseVector(0.5078)),
+    LabeledVector(12.8707, DenseVector(0.5856)),
+    LabeledVector(10.7026, DenseVector(0.7629)),
+    LabeledVector(9.8571, DenseVector(0.0830)),
+    LabeledVector(10.5001, DenseVector(0.6616)),
+    LabeledVector(11.2063, DenseVector(0.5170)),
+    LabeledVector(9.1892, DenseVector(0.1710)),
+    LabeledVector(12.2408, DenseVector(0.9386)),
+    LabeledVector(11.0307, DenseVector(0.5905)),
+    LabeledVector(10.1369, DenseVector(0.4406)),
+    LabeledVector(10.7609, DenseVector(0.9419)),
+    LabeledVector(12.5328, DenseVector(0.6559)),
+    LabeledVector(13.3560, DenseVector(0.4519)),
+    LabeledVector(14.7424, DenseVector(0.8397)),
+    LabeledVector(11.1057, DenseVector(0.5326)),
+    LabeledVector(11.6157, DenseVector(0.5539)),
+    LabeledVector(11.5744, DenseVector(0.6801)),
+    LabeledVector(11.1775, DenseVector(0.3672)),
+    LabeledVector(9.7991, DenseVector(0.2393)),
+    LabeledVector(9.8173, DenseVector(0.5789)),
+    LabeledVector(12.5642, DenseVector(0.8669)),
+    LabeledVector(9.9952, DenseVector(0.4068)),
+    LabeledVector(8.4354, DenseVector(0.1126)),
+    LabeledVector(13.7058, DenseVector(0.4438)),
+    LabeledVector(10.6672, DenseVector(0.3002)),
+    LabeledVector(11.6080, DenseVector(0.4014)),
+    LabeledVector(13.6926, DenseVector(0.8334)),
+    LabeledVector(9.5261, DenseVector(0.4036)),
+    LabeledVector(11.5837, DenseVector(0.3902)),
+    LabeledVector(11.5831, DenseVector(0.3604)),
+    LabeledVector(10.5038, DenseVector(0.1403)),
+    LabeledVector(10.9382, DenseVector(0.2601)),
+    LabeledVector(9.7325, DenseVector(0.0868)),
+    LabeledVector(12.0113, DenseVector(0.4294)),
+    LabeledVector(9.9219, DenseVector(0.2573)),
+    LabeledVector(10.0963, DenseVector(0.2976)),
+    LabeledVector(11.9999, DenseVector(0.4249)),
+    LabeledVector(12.0442, DenseVector(0.1192))
   )
 
   val expectedPolynomialWeights = Seq(0.2375, -0.3493, -0.1674)
@@ -75,55 +75,55 @@ object RegressionData {
   val expectedPolynomialSquaredResidualSum = 1.5389e+03
 
   val polynomialData: Seq[LabeledVector] = Seq(
-    LabeledVector(DenseVector(3.6663), 2.1415),
-    LabeledVector(DenseVector(4.0761), 10.9835),
-    LabeledVector(DenseVector(0.5714), 7.2507),
-    LabeledVector(DenseVector(4.1102), 11.9274),
-    LabeledVector(DenseVector(2.8456), -4.2798),
-    LabeledVector(DenseVector(0.4389), 7.1929),
-    LabeledVector(DenseVector(1.2532), 4.5097),
-    LabeledVector(DenseVector(2.4610), -3.6059),
-    LabeledVector(DenseVector(4.3088), 18.1132),
-    LabeledVector(DenseVector(4.3420), 19.2674),
-    LabeledVector(DenseVector(0.7093), 7.0664),
-    LabeledVector(DenseVector(4.3677), 20.1836),
-    LabeledVector(DenseVector(4.3073), 18.0609),
-    LabeledVector(DenseVector(2.1842), -2.2090),
-    LabeledVector(DenseVector(3.6013), 1.1306),
-    LabeledVector(DenseVector(0.6385), 7.1903),
-    LabeledVector(DenseVector(1.8979), -0.2668),
-    LabeledVector(DenseVector(4.1208), 12.2281),
-    LabeledVector(DenseVector(3.5649), 0.6086),
-    LabeledVector(DenseVector(4.3177), 18.4202),
-    LabeledVector(DenseVector(2.9508), -4.1284),
-    LabeledVector(DenseVector(0.1607), 6.1964),
-    LabeledVector(DenseVector(3.8211), 4.9638),
-    LabeledVector(DenseVector(4.2030), 14.6677),
-    LabeledVector(DenseVector(3.0543), -3.8132),
-    LabeledVector(DenseVector(3.4098), -1.2891),
-    LabeledVector(DenseVector(3.3441), -1.9390),
-    LabeledVector(DenseVector(1.7650), 0.7293),
-    LabeledVector(DenseVector(2.9497), -4.1310),
-    LabeledVector(DenseVector(0.7703), 6.9131),
-    LabeledVector(DenseVector(3.1772), -3.2060),
-    LabeledVector(DenseVector(0.1432), 6.0899),
-    LabeledVector(DenseVector(1.2462), 4.5567),
-    LabeledVector(DenseVector(0.2078), 6.4562),
-    LabeledVector(DenseVector(0.4371), 7.1903),
-    LabeledVector(DenseVector(3.7056), 2.8017),
-    LabeledVector(DenseVector(3.1267), -3.4873),
-    LabeledVector(DenseVector(1.4269), 3.2918),
-    LabeledVector(DenseVector(4.2760), 17.0085),
-    LabeledVector(DenseVector(0.1550), 6.1622),
-    LabeledVector(DenseVector(1.9743), -0.8192),
-    LabeledVector(DenseVector(1.7170), 1.0957),
-    LabeledVector(DenseVector(3.4448), -0.9065),
-    LabeledVector(DenseVector(3.5784), 0.7986),
-    LabeledVector(DenseVector(0.8409), 6.6861),
-    LabeledVector(DenseVector(2.2039), -2.3274),
-    LabeledVector(DenseVector(2.0051), -1.0359),
-    LabeledVector(DenseVector(2.9084), -4.2092),
-    LabeledVector(DenseVector(3.1921), -3.1140),
-    LabeledVector(DenseVector(3.3961), -1.4323)
+    LabeledVector(2.1415, DenseVector(3.6663)),
+    LabeledVector(10.9835, DenseVector(4.0761)),
+    LabeledVector(7.2507, DenseVector(0.5714)),
+    LabeledVector(11.9274, DenseVector(4.1102)),
+    LabeledVector(-4.2798, DenseVector(2.8456)),
+    LabeledVector(7.1929, DenseVector(0.4389)),
+    LabeledVector(4.5097, DenseVector(1.2532)),
+    LabeledVector(-3.6059, DenseVector(2.4610)),
+    LabeledVector(18.1132, DenseVector(4.3088)),
+    LabeledVector(19.2674, DenseVector(4.3420)),
+    LabeledVector(7.0664, DenseVector(0.7093)),
+    LabeledVector(20.1836, DenseVector(4.3677)),
+    LabeledVector(18.0609, DenseVector(4.3073)),
+    LabeledVector(-2.2090, DenseVector(2.1842)),
+    LabeledVector(1.1306, DenseVector(3.6013)),
+    LabeledVector(7.1903, DenseVector(0.6385)),
+    LabeledVector(-0.2668, DenseVector(1.8979)),
+    LabeledVector(12.2281, DenseVector(4.1208)),
+    LabeledVector(0.6086, DenseVector(3.5649)),
+    LabeledVector(18.4202, DenseVector(4.3177)),
+    LabeledVector(-4.1284, DenseVector(2.9508)),
+    LabeledVector(6.1964, DenseVector(0.1607)),
+    LabeledVector(4.9638, DenseVector(3.8211)),
+    LabeledVector(14.6677, DenseVector(4.2030)),
+    LabeledVector(-3.8132, DenseVector(3.0543)),
+    LabeledVector(-1.2891, DenseVector(3.4098)),
+    LabeledVector(-1.9390, DenseVector(3.3441)),
+    LabeledVector(0.7293, DenseVector(1.7650)),
+    LabeledVector(-4.1310, DenseVector(2.9497)),
+    LabeledVector(6.9131, DenseVector(0.7703)),
+    LabeledVector(-3.2060, DenseVector(3.1772)),
+    LabeledVector(6.0899, DenseVector(0.1432)),
+    LabeledVector(4.5567, DenseVector(1.2462)),
+    LabeledVector(6.4562, DenseVector(0.2078)),
+    LabeledVector(7.1903, DenseVector(0.4371)),
+    LabeledVector(2.8017, DenseVector(3.7056)),
+    LabeledVector(-3.4873, DenseVector(3.1267)),
+    LabeledVector(3.2918, DenseVector(1.4269)),
+    LabeledVector(17.0085, DenseVector(4.2760)),
+    LabeledVector(6.1622, DenseVector(0.1550)),
+    LabeledVector(-0.8192, DenseVector(1.9743)),
+    LabeledVector(1.0957, DenseVector(1.7170)),
+    LabeledVector(-0.9065, DenseVector(3.4448)),
+    LabeledVector(0.7986, DenseVector(3.5784)),
+    LabeledVector(6.6861, DenseVector(0.8409)),
+    LabeledVector(-2.3274, DenseVector(2.2039)),
+    LabeledVector(-1.0359, DenseVector(2.0051)),
+    LabeledVector(-4.2092, DenseVector(2.9084)),
+    LabeledVector(-3.1140, DenseVector(3.1921)),
+    LabeledVector(-1.4323, DenseVector(3.3961))
   )
 }


Mime
View raw message