spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jkbrad...@apache.org
Subject spark git commit: [SPARK-20043][ML] DecisionTreeModel: ImpurityCalculator builder fails for uppercase impurity type Gini
Date Tue, 28 Mar 2017 23:14:16 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.1 4964dbedb -> 30954806f


[SPARK-20043][ML] DecisionTreeModel: ImpurityCalculator builder fails for uppercase impurity
type Gini

Fix bug: DecisionTreeModel can't recongnize Impurity "Gini" when loading

TODO:
+ [x] add unit test
+ [x] fix the bug

Author: 颜发才(Yan Facai) <facai.yan@gmail.com>

Closes #17407 from facaiy/BUG/decision_tree_loader_failer_with_Gini_impurity.

(cherry picked from commit 7d432af8f3c47973550ea253dae0c23cd2961bde)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/30954806
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/30954806
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/30954806

Branch: refs/heads/branch-2.1
Commit: 30954806f1be0dba63f0a608d824d7d811485801
Parents: 4964dbe
Author: 颜发才(Yan Facai) <facai.yan@gmail.com>
Authored: Tue Mar 28 16:14:01 2017 -0700
Committer: Joseph K. Bradley <joseph@databricks.com>
Committed: Tue Mar 28 16:14:11 2017 -0700

----------------------------------------------------------------------
 .../apache/spark/mllib/tree/impurity/Impurity.scala   |  2 +-
 .../classification/DecisionTreeClassifierSuite.scala  | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/30954806/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index a5bdc2c..98a3021 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -184,7 +184,7 @@ private[spark] object ImpurityCalculator {
    * the given stats.
    */
   def getCalculator(impurity: String, stats: Array[Double]): ImpurityCalculator = {
-    impurity match {
+    impurity.toLowerCase match {
       case "gini" => new GiniCalculator(stats)
       case "entropy" => new EntropyCalculator(stats)
       case "variance" => new VarianceCalculator(stats)

http://git-wip-us.apache.org/repos/asf/spark/blob/30954806/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index c711e7f..692a172 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -383,6 +383,20 @@ class DecisionTreeClassifierSuite
     testEstimatorAndModelReadWrite(dt, continuousData, allParamSettings ++ Map("maxDepth"
-> 0),
       checkModelData)
   }
+
+  test("SPARK-20043: " +
+       "ImpurityCalculator builder fails for uppercase impurity type Gini in model read/write")
{
+    val rdd = TreeTests.getTreeReadWriteData(sc)
+    val data: DataFrame =
+      TreeTests.setMetadata(rdd, Map.empty[Int, Int], numClasses = 2)
+
+    val dt = new DecisionTreeClassifier()
+      .setImpurity("Gini")
+      .setMaxDepth(2)
+    val model = dt.fit(data)
+
+    testDefaultReadWrite(model)
+  }
 }
 
 private[ml] object DecisionTreeClassifierSuite extends SparkFunSuite {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message