spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From WeichenXu123 <...@git.apache.org>
Subject [GitHub] spark issue #19208: [SPARK-21087] [ML] CrossValidator, TrainValidationSplit ...
Date Tue, 14 Nov 2017 07:45:46 GMT
Github user WeichenXu123 commented on the issue:

    https://github.com/apache/spark/pull/19208
  
    I manually tested backwards compatibility and it works fine. I paste the test code for
`CrossValidator` here.
    
    Run following code in spark-2.2 shell first:
    ```
    import java.io.File
    import org.apache.spark.ml.tuning._
    import org.apache.spark.ml.{Estimator, Model, Pipeline}
    import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel,
OneVsRest}
    import org.apache.spark.ml.feature.HashingTF
    import org.apache.spark.ml.linalg.Vectors
    import org.apache.spark.ml.param.ParamMap
    import org.apache.spark.ml.regression.LinearRegression
    import org.apache.spark.sql.Dataset
    import org.apache.spark.sql.types.StructType
    import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator, MulticlassClassificationEvaluator,
RegressionEvaluator}
    import org.apache.spark.ml.feature.{Instance, LabeledPoint}
    
    def generateLogisticInput(offset: Double,scale: Double,nPoints: Int,seed: Int):Seq[LabeledPoint]
= {
        val rnd = new java.util.Random(seed)
        val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
        val y = (0 until nPoints).map { i =>
          val p = 1.0 / (1.0 + math.exp(-(offset + scale * x1(i))))
          if (rnd.nextDouble() < p) 1.0 else 0.0
        }
        val testData = (0 until nPoints).map(i => LabeledPoint(y(i), Vectors.dense(Array(x1(i)))))
        testData
      }
    import spark.implicits._
    val dataset = sc.parallelize(generateLogisticInput(0.0, 1.0, 10, 42), 2).toDF()
    val lr = new LogisticRegression
    val lrParamMaps = new ParamGridBuilder().addGrid(lr.regParam, Array(0.001, 1000.0)).addGrid(lr.maxIter,
Array(0, 3)).build()
    val eval = new BinaryClassificationEvaluator
    val numFolds = 3
    val cv = new CrossValidator().setEstimator(lr).setEstimatorParamMaps(lrParamMaps).setEvaluator(eval).setNumFolds(numFolds)
    val cvModel = cv.fit(dataset)
    cvModel.save("file:///Users/weichenxu/work/test/s1")
    ```
    
    and then run following code on current PR: (in spark-shell)
    ```
    val model = org.apache.spark.ml.tuning.CrossValidatorModel.load("file:///Users/weichenxu/work/test/s1")
    model.hasSubModels
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message