spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From srowen <...@git.apache.org>
Subject [GitHub] spark pull request: [SPARK-13435] [MLlib] Add Weighted Cohen's kap...
Date Mon, 22 Feb 2016 15:29:00 GMT
Github user srowen commented on a diff in the pull request:

    https://github.com/apache/spark/pull/11303#discussion_r53640132
  
    --- Diff: mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
---
    @@ -129,86 +135,199 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[(Double,
Doubl
       }
     
       /**
    -   * Returns f1-measure for a given label (category)
    -   * @param label the label.
    -   */
    +    * Returns f1-measure for a given label (category)
    +    *
    +    * @param label the label.
    +    */
       @Since("1.1.0")
       def fMeasure(label: Double): Double = fMeasure(label, 1.0)
     
       /**
    -   * Returns precision
    -   */
    +    * Returns precision
    +    */
       @Since("1.1.0")
       lazy val precision: Double = tpByClass.values.sum.toDouble / labelCount
     
       /**
    -   * Returns recall
    -   * (equals to precision for multiclass classifier
    -   * because sum of all false positives is equal to sum
    -   * of all false negatives)
    -   */
    +    * Returns recall
    +    * (equals to precision for multiclass classifier
    +    * because sum of all false positives is equal to sum
    +    * of all false negatives)
    +    */
       @Since("1.1.0")
       lazy val recall: Double = precision
     
       /**
    -   * Returns f-measure
    -   * (equals to precision and recall because precision equals recall)
    -   */
    +    * Returns f-measure
    +    * (equals to precision and recall because precision equals recall)
    +    */
       @Since("1.1.0")
       lazy val fMeasure: Double = precision
     
       /**
    -   * Returns weighted true positive rate
    -   * (equals to precision, recall and f-measure)
    -   */
    +    * Returns weighted true positive rate
    +    * (equals to precision, recall and f-measure)
    +    */
       @Since("1.1.0")
       lazy val weightedTruePositiveRate: Double = weightedRecall
     
       /**
    -   * Returns weighted false positive rate
    -   */
    +    * Returns weighted false positive rate
    +    */
       @Since("1.1.0")
       lazy val weightedFalsePositiveRate: Double = labelCountByClass.map { case (category,
count) =>
         falsePositiveRate(category) * count.toDouble / labelCount
       }.sum
     
       /**
    -   * Returns weighted averaged recall
    -   * (equals to precision, recall and f-measure)
    -   */
    +    * Returns weighted averaged recall
    +    * (equals to precision, recall and f-measure)
    +    */
       @Since("1.1.0")
       lazy val weightedRecall: Double = labelCountByClass.map { case (category, count) =>
         recall(category) * count.toDouble / labelCount
       }.sum
     
       /**
    -   * Returns weighted averaged precision
    -   */
    +    * Returns weighted averaged precision
    +    */
       @Since("1.1.0")
       lazy val weightedPrecision: Double = labelCountByClass.map { case (category, count)
=>
         precision(category) * count.toDouble / labelCount
       }.sum
     
       /**
    -   * Returns weighted averaged f-measure
    -   * @param beta the beta parameter.
    -   */
    +    * Returns weighted averaged f-measure
    +    *
    +    * @param beta the beta parameter.
    +    */
       @Since("1.1.0")
       def weightedFMeasure(beta: Double): Double = labelCountByClass.map { case (category,
count) =>
         fMeasure(category, beta) * count.toDouble / labelCount
       }.sum
     
       /**
    -   * Returns weighted averaged f1-measure
    -   */
    +    * Returns weighted averaged f1-measure
    +    */
       @Since("1.1.0")
       lazy val weightedFMeasure: Double = labelCountByClass.map { case (category, count)
=>
         fMeasure(category, 1.0) * count.toDouble / labelCount
       }.sum
     
       /**
    -   * Returns the sequence of labels in ascending order
    -   */
    +    * Returns the sequence of labels in ascending order
    +    */
       @Since("1.1.0")
       lazy val labels: Array[Double] = tpByClass.keys.toArray.sorted
    +
    +
    +  /**
    +    * Returns unweighted Cohen's Kappa
    +    * Cohen's kappa coefficient is a statistic which measures inter-rater
    +    * agreement for qualitative (categorical) items. It is generally thought
    +    * to be a more robust measure than simple percent agreement calculation,
    +    * since kappa takes into account the agreement occurring by chance.
    +    * The kappa score is a number between -1 and 1. Scores above 0.8 are
    +    * generally considered good agreement; zero or lower means no agreement
    +    * (practically random labels).
    +    */
    +  @Since("1.6.0")
    +  def kappa(): Double = {
    +    kappa("default")
    +  }
    +
    +  /**
    +    * Returns Cohen's Kappa with built-in weighted types
    +    *
    +    * @param weights the weighted type. "default" means no weighted;
    +    *                "linear" means linear weighted;
    +    *                "quadratic" means quadratic weighted.
    +    */
    +  @Since("1.6.0")
    +  def kappa(weights: String): Double = {
    +
    +    val func = weights match {
    +      case "default" =>
    +        (i: Int, j: Int) => {
    +          if (i == j) {
    +            0.0
    +          } else {
    +            1.0
    +          }
    +        }
    +      case "linear" =>
    +        (i: Int, j: Int) => Math.abs(i - j).toDouble
    +      case "quadratic" =>
    +        (i: Int, j: Int) => (i - j).toDouble * (i - j)
    --- End diff --
    
    Also tiny nit, but this needlessly computes the difference twice


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message