spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject [1/5] spark git commit: [SPARK-16421][EXAMPLES][ML] Improve ML Example Outputs
Date Fri, 05 Aug 2016 19:57:48 GMT
Repository: spark
Updated Branches:
  refs/heads/master 2460f03ff -> 180fd3e0a


http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
index 75ba33a..989d250 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
@@ -20,6 +20,7 @@ package org.apache.spark.examples.ml
 
 // $example on$
 import org.apache.spark.ml.feature.Normalizer
+import org.apache.spark.ml.linalg.Vectors
 // $example off$
 import org.apache.spark.sql.SparkSession
 
@@ -31,7 +32,11 @@ object NormalizerExample {
       .getOrCreate()
 
     // $example on$
-    val dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+    val dataFrame = spark.createDataFrame(Seq(
+      (0, Vectors.dense(1.0, 0.5, -1.0)),
+      (1, Vectors.dense(2.0, 1.0, 1.0)),
+      (2, Vectors.dense(4.0, 10.0, 2.0))
+    )).toDF("id", "features")
 
     // Normalize each Vector using $L^1$ norm.
     val normalizer = new Normalizer()
@@ -40,10 +45,12 @@ object NormalizerExample {
       .setP(1.0)
 
     val l1NormData = normalizer.transform(dataFrame)
+    println("Normalized using L^1 norm")
     l1NormData.show()
 
     // Normalize each Vector using $L^\infty$ norm.
     val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.PositiveInfinity)
+    println("Normalized using L^inf norm")
     lInfNormData.show()
     // $example off$
 

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
index 4aa649b..274cc12 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
@@ -49,8 +49,9 @@ object OneHotEncoderExample {
     val encoder = new OneHotEncoder()
       .setInputCol("categoryIndex")
       .setOutputCol("categoryVec")
+
     val encoded = encoder.transform(indexed)
-    encoded.select("id", "categoryVec").show()
+    encoded.show()
     // $example off$
 
     spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
index acde110..4ad6c7c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
@@ -69,7 +69,7 @@ object OneVsRestExample {
 
     // compute the classification error on test data.
     val accuracy = evaluator.evaluate(predictions)
-    println(s"Test Error : ${1 - accuracy}")
+    println(s"Test Error = ${1 - accuracy}")
     // $example off$
 
     spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
index dca96ee..4e1d7cd 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
@@ -38,14 +38,15 @@ object PCAExample {
       Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
     )
     val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+
     val pca = new PCA()
       .setInputCol("features")
       .setOutputCol("pcaFeatures")
       .setK(3)
       .fit(df)
-    val pcaDF = pca.transform(df)
-    val result = pcaDF.select("pcaFeatures")
-    result.show()
+
+    val result = pca.transform(df).select("pcaFeatures")
+    result.show(false)
     // $example off$
 
     spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
index 54d2e6b..f117b03 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
@@ -33,17 +33,19 @@ object PolynomialExpansionExample {
 
     // $example on$
     val data = Array(
-      Vectors.dense(-2.0, 2.3),
+      Vectors.dense(2.0, 1.0),
       Vectors.dense(0.0, 0.0),
-      Vectors.dense(0.6, -1.1)
+      Vectors.dense(3.0, -1.0)
     )
     val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-    val polynomialExpansion = new PolynomialExpansion()
+
+    val polyExpansion = new PolynomialExpansion()
       .setInputCol("features")
       .setOutputCol("polyFeatures")
       .setDegree(3)
-    val polyDF = polynomialExpansion.transform(df)
-    polyDF.select("polyFeatures").take(3).foreach(println)
+
+    val polyDF = polyExpansion.transform(df)
+    polyDF.show(false)
     // $example off$
 
     spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
index a56de08..369a6ff 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
@@ -40,7 +40,7 @@ object StopWordsRemoverExample {
       (1, Seq("Mary", "had", "a", "little", "lamb"))
     )).toDF("id", "raw")
 
-    remover.transform(dataSet).show()
+    remover.transform(dataSet).show(false)
     // $example off$
 
     spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
index 97f6fcc..ec2df2e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
@@ -33,9 +33,9 @@ object TfIdfExample {
 
     // $example on$
     val sentenceData = spark.createDataFrame(Seq(
-      (0, "Hi I heard about Spark"),
-      (0, "I wish Java could use case classes"),
-      (1, "Logistic regression models are neat")
+      (0.0, "Hi I heard about Spark"),
+      (0.0, "I wish Java could use case classes"),
+      (1.0, "Logistic regression models are neat")
     )).toDF("label", "sentence")
 
     val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
@@ -51,7 +51,7 @@ object TfIdfExample {
     val idfModel = idf.fit(featurizedData)
 
     val rescaledData = idfModel.transform(featurizedData)
-    rescaledData.select("features", "label").take(3).foreach(println)
+    rescaledData.select("label", "features").show()
     // $example off$
 
     spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
index 90d0faa..0167dc3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
@@ -20,6 +20,7 @@ package org.apache.spark.examples.ml
 
 // $example on$
 import org.apache.spark.ml.feature.{RegexTokenizer, Tokenizer}
+import org.apache.spark.sql.functions._
 // $example off$
 import org.apache.spark.sql.SparkSession
 
@@ -35,7 +36,7 @@ object TokenizerExample {
       (0, "Hi I heard about Spark"),
       (1, "I wish Java could use case classes"),
       (2, "Logistic,regression,models,are,neat")
-    )).toDF("label", "sentence")
+    )).toDF("id", "sentence")
 
     val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
     val regexTokenizer = new RegexTokenizer()
@@ -43,11 +44,15 @@ object TokenizerExample {
       .setOutputCol("words")
       .setPattern("\\W") // alternatively .setPattern("\\w+").setGaps(false)
 
+    val countTokens = udf { (words: Seq[String]) => words.length }
+
     val tokenized = tokenizer.transform(sentenceDataFrame)
-    tokenized.select("words", "label").take(3).foreach(println)
+    tokenized.select("sentence", "words")
+        .withColumn("tokens", countTokens(col("words"))).show(false)
 
     val regexTokenized = regexTokenizer.transform(sentenceDataFrame)
-    regexTokenized.select("words", "label").take(3).foreach(println)
+    regexTokenized.select("sentence", "words")
+        .withColumn("tokens", countTokens(col("words"))).show(false)
     // $example off$
 
     spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
b/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
index 13c72f8..13b58d1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
@@ -100,6 +100,7 @@ object UnaryTransformerExample {
     val data = spark.range(0, 5).toDF("input")
       .select(col("input").cast("double").as("input"))
     val result = myTransformer.transform(data)
+    println("Transformed by adding constant value")
     result.show()
 
     // Save and load the Transformer.
@@ -109,6 +110,7 @@ object UnaryTransformerExample {
     val sameTransformer = MyTransformer.load(dirName)
 
     // Transform the data to show the results are identical.
+    println("Same transform applied from loaded model")
     val sameResult = sameTransformer.transform(data)
     sameResult.show()
 

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
index 8910470..3d5c7ef 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
@@ -41,7 +41,8 @@ object VectorAssemblerExample {
       .setOutputCol("features")
 
     val output = assembler.transform(dataset)
-    println(output.select("features", "clicked").first())
+    println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column 'features'")
+    output.select("features", "clicked").show(false)
     // $example off$
 
     spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
b/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
index 85dd5c2..63a6091 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
@@ -37,7 +37,10 @@ object VectorSlicerExample {
       .getOrCreate()
 
     // $example on$
-    val data = Arrays.asList(Row(Vectors.dense(-2.0, 2.3, 0.0)))
+    val data = Arrays.asList(
+      Row(Vectors.sparse(3, Seq((0, -2.0), (1, 2.3)))),
+      Row(Vectors.dense(-2.0, 2.3, 0.0))
+    )
 
     val defaultAttr = NumericAttribute.defaultAttr
     val attrs = Array("f1", "f2", "f3").map(defaultAttr.withName)
@@ -51,7 +54,7 @@ object VectorSlicerExample {
     // or slicer.setIndices(Array(1, 2)), or slicer.setNames(Array("f2", "f3"))
 
     val output = slicer.transform(dataset)
-    println(output.select("userFeatures", "features").first())
+    output.show(false)
     // $example off$
 
     spark.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/180fd3e0/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
index 5c8bd19..4bcc6ac 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
@@ -20,6 +20,8 @@ package org.apache.spark.examples.ml
 
 // $example on$
 import org.apache.spark.ml.feature.Word2Vec
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.sql.Row
 // $example off$
 import org.apache.spark.sql.SparkSession
 
@@ -47,7 +49,8 @@ object Word2VecExample {
     val model = word2Vec.fit(documentDF)
 
     val result = model.transform(documentDF)
-    result.select("result").take(3).foreach(println)
+    result.collect().foreach { case Row(text: Seq[_], features: Vector) =>
+      println(s"Text: [${text.mkString(", ")}] => \nVector: $features\n") }
     // $example off$
 
     spark.stop()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message