spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject spark git commit: [SPARK-16800][EXAMPLES][ML] Fix Java examples that fail to run due to exception
Date Sat, 30 Jul 2016 15:08:42 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 7d87fc964 -> 26da5a7fc


[SPARK-16800][EXAMPLES][ML] Fix Java examples that fail to run due to exception

## What changes were proposed in this pull request?
Some Java examples are using mllib.linalg.Vectors instead of ml.linalg.Vectors and causes
an exception when run.  Also there are some Java examples that incorrectly specify data types
in the schema, also causing an exception.

## How was this patch tested?
Ran corrected examples locally

Author: Bryan Cutler <cutlerb@gmail.com>

Closes #14405 from BryanCutler/java-examples-ml.Vectors-fix-SPARK-16800.

(cherry picked from commit a6290e51e402e8434d6207d553db1f551e714fde)
Signed-off-by: Sean Owen <sowen@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/26da5a7f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/26da5a7f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/26da5a7f

Branch: refs/heads/branch-2.0
Commit: 26da5a7fc37ac961e7b4d8f423e8e58aefb5c2bc
Parents: 7d87fc9
Author: Bryan Cutler <cutlerb@gmail.com>
Authored: Sat Jul 30 08:08:33 2016 -0700
Committer: Sean Owen <sowen@cloudera.com>
Committed: Sat Jul 30 08:08:40 2016 -0700

----------------------------------------------------------------------
 .../ml/JavaAFTSurvivalRegressionExample.java    |  8 +++-
 .../spark/examples/ml/JavaBinarizerExample.java |  2 +-
 .../examples/ml/JavaChiSqSelectorExample.java   |  4 +-
 .../spark/examples/ml/JavaDCTExample.java       |  4 +-
 .../JavaEstimatorTransformerParamExample.java   | 43 ++++++++++++--------
 ...vaLinearRegressionWithElasticNetExample.java |  2 +-
 .../examples/ml/JavaOneHotEncoderExample.java   |  2 +-
 .../spark/examples/ml/JavaPCAExample.java       |  4 +-
 .../ml/JavaPolynomialExpansionExample.java      |  4 +-
 .../spark/examples/ml/JavaTfIdfExample.java     |  8 ++--
 .../examples/ml/JavaVectorAssemblerExample.java |  4 +-
 .../examples/ml/JavaVectorSlicerExample.java    |  2 +-
 12 files changed, 49 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java
b/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java
index b011575..3f03458 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java
@@ -23,12 +23,16 @@ import java.util.List;
 
 import org.apache.spark.ml.regression.AFTSurvivalRegression;
 import org.apache.spark.ml.regression.AFTSurvivalRegressionModel;
-import org.apache.spark.mllib.linalg.*;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
 import org.apache.spark.sql.SparkSession;
-import org.apache.spark.sql.types.*;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
 // $example off$
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
index 5f964ac..a954dbd 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
@@ -47,7 +47,7 @@ public class JavaBinarizerExample {
       RowFactory.create(2, 0.2)
     );
     StructType schema = new StructType(new StructField[]{
-      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
       new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
     });
     Dataset<Row> continuousDataFrame = spark.createDataFrame(data, schema);

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java
b/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java
index f8f2fb1..fcf90d8 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java
@@ -25,8 +25,8 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.spark.ml.feature.ChiSqSelector;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
 import org.apache.spark.sql.types.DataTypes;

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
index eee92c7..66ce23b 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
@@ -25,8 +25,8 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.spark.ml.feature.DCT;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
 import org.apache.spark.sql.types.Metadata;

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java
b/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java
index 889f578..9e07a0c 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java
@@ -19,16 +19,20 @@ package org.apache.spark.examples.ml;
 
 // $example on$
 import java.util.Arrays;
-// $example off$
+import java.util.List;
 
-// $example on$
 import org.apache.spark.ml.classification.LogisticRegression;
 import org.apache.spark.ml.classification.LogisticRegressionModel;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.ml.param.ParamMap;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
 // $example off$
 import org.apache.spark.sql.SparkSession;
 
@@ -44,15 +48,17 @@ public class JavaEstimatorTransformerParamExample {
 
     // $example on$
     // Prepare training data.
-    // We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans
into
-    // DataFrames, where it uses the bean metadata to infer the schema.
-    Dataset<Row> training = spark.createDataFrame(
-      Arrays.asList(
-        new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
-        new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
-        new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
-        new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))
-      ), LabeledPoint.class);
+    List<Row> dataTraining = Arrays.asList(
+        RowFactory.create(1.0, Vectors.dense(0.0, 1.1, 0.1)),
+        RowFactory.create(0.0, Vectors.dense(2.0, 1.0, -1.0)),
+        RowFactory.create(0.0, Vectors.dense(2.0, 1.3, 1.0)),
+        RowFactory.create(1.0, Vectors.dense(0.0, 1.2, -0.5))
+    );
+    StructType schema = new StructType(new StructField[]{
+        new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+        new StructField("features", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> training = spark.createDataFrame(dataTraining, schema);
 
     // Create a LogisticRegression instance. This instance is an Estimator.
     LogisticRegression lr = new LogisticRegression();
@@ -87,11 +93,12 @@ public class JavaEstimatorTransformerParamExample {
     System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap());
 
     // Prepare test documents.
-    Dataset<Row> test = spark.createDataFrame(Arrays.asList(
-      new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
-      new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
-      new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))
-    ), LabeledPoint.class);
+    List<Row> dataTest = Arrays.asList(
+        RowFactory.create(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
+        RowFactory.create(0.0, Vectors.dense(3.0, 2.0, -0.1)),
+        RowFactory.create(1.0, Vectors.dense(0.0, 2.2, -1.5))
+    );
+    Dataset<Row> test = spark.createDataFrame(dataTest, schema);
 
     // Make predictions on test documents using the Transformer.transform() method.
     // LogisticRegression.transform will only use the 'features' column.

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
index dcd209e..a561b6d 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
@@ -21,7 +21,7 @@ package org.apache.spark.examples.ml;
 import org.apache.spark.ml.regression.LinearRegression;
 import org.apache.spark.ml.regression.LinearRegressionModel;
 import org.apache.spark.ml.regression.LinearRegressionTrainingSummary;
-import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
index 5d29e54..a15e5f8 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
@@ -53,7 +53,7 @@ public class JavaOneHotEncoderExample {
     );
 
     StructType schema = new StructType(new StructField[]{
-      new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
       new StructField("category", DataTypes.StringType, false, Metadata.empty())
     });
 

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
index ffa979e..d597a9a 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
@@ -25,8 +25,8 @@ import java.util.List;
 
 import org.apache.spark.ml.feature.PCA;
 import org.apache.spark.ml.feature.PCAModel;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
index 7afcd0e..67180df 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
@@ -24,8 +24,8 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.spark.ml.feature.PolynomialExpansion;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java
index 6e07539..800e42c 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java
@@ -25,7 +25,7 @@ import org.apache.spark.ml.feature.HashingTF;
 import org.apache.spark.ml.feature.IDF;
 import org.apache.spark.ml.feature.IDFModel;
 import org.apache.spark.ml.feature.Tokenizer;
-import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.ml.linalg.Vector;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
@@ -45,9 +45,9 @@ public class JavaTfIdfExample {
 
     // $example on$
     List<Row> data = Arrays.asList(
-      RowFactory.create(0, "Hi I heard about Spark"),
-      RowFactory.create(0, "I wish Java could use case classes"),
-      RowFactory.create(1, "Logistic regression models are neat")
+      RowFactory.create(0.0, "Hi I heard about Spark"),
+      RowFactory.create(0.0, "I wish Java could use case classes"),
+      RowFactory.create(1.0, "Logistic regression models are neat")
     );
     StructType schema = new StructType(new StructField[]{
       new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
index 41f1d87..9bb0f93 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
@@ -23,8 +23,8 @@ import org.apache.spark.sql.SparkSession;
 import java.util.Arrays;
 
 import org.apache.spark.ml.feature.VectorAssembler;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;

http://git-wip-us.apache.org/repos/asf/spark/blob/26da5a7f/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
index 24959c0..19b8bc8 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
@@ -28,7 +28,7 @@ import org.apache.spark.ml.attribute.Attribute;
 import org.apache.spark.ml.attribute.AttributeGroup;
 import org.apache.spark.ml.attribute.NumericAttribute;
 import org.apache.spark.ml.feature.VectorSlicer;
-import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message