spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From m...@apache.org
Subject [3/3] spark git commit: [SPARK-9864] [DOC] [MLlib] [SQL] Replace since in scaladoc to Since annotation
Date Fri, 21 Aug 2015 21:19:31 GMT
[SPARK-9864] [DOC] [MLlib] [SQL] Replace since in scaladoc to Since annotation

Author: MechCoder <manojkumarsivaraj334@gmail.com>

Closes #8352 from MechCoder/since.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f5b028ed
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f5b028ed
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f5b028ed

Branch: refs/heads/master
Commit: f5b028ed2f1ad6de43c8b50ebf480e1b6c047035
Parents: d89cc38
Author: MechCoder <manojkumarsivaraj334@gmail.com>
Authored: Fri Aug 21 14:19:24 2015 -0700
Committer: Xiangrui Meng <meng@databricks.com>
Committed: Fri Aug 21 14:19:24 2015 -0700

----------------------------------------------------------------------
 .../classification/ClassificationModel.scala    |   8 +-
 .../classification/LogisticRegression.scala     |  30 +++---
 .../spark/mllib/classification/NaiveBayes.scala |   7 +-
 .../apache/spark/mllib/classification/SVM.scala |  28 ++---
 .../mllib/clustering/GaussianMixture.scala      |  28 ++---
 .../mllib/clustering/GaussianMixtureModel.scala |  28 ++---
 .../apache/spark/mllib/clustering/KMeans.scala  |  50 ++++-----
 .../spark/mllib/clustering/KMeansModel.scala    |  27 ++---
 .../org/apache/spark/mllib/clustering/LDA.scala |  56 +++++-----
 .../spark/mllib/clustering/LDAModel.scala       |  69 +++++-------
 .../spark/mllib/clustering/LDAOptimizer.scala   |  24 ++---
 .../clustering/PowerIterationClustering.scala   |  38 +++----
 .../mllib/clustering/StreamingKMeans.scala      |  35 +++---
 .../BinaryClassificationMetrics.scala           |  26 ++---
 .../mllib/evaluation/MulticlassMetrics.scala    |  20 ++--
 .../mllib/evaluation/MultilabelMetrics.scala    |   9 +-
 .../spark/mllib/evaluation/RankingMetrics.scala |  10 +-
 .../mllib/evaluation/RegressionMetrics.scala    |  14 +--
 .../spark/mllib/fpm/AssociationRules.scala      |  20 ++--
 .../org/apache/spark/mllib/fpm/FPGrowth.scala   |  22 ++--
 .../apache/spark/mllib/linalg/Matrices.scala    | 106 ++++++++-----------
 .../linalg/SingularValueDecomposition.scala     |   4 +-
 .../org/apache/spark/mllib/linalg/Vectors.scala |  90 +++++-----------
 .../mllib/linalg/distributed/BlockMatrix.scala  |  88 +++++++--------
 .../linalg/distributed/CoordinateMatrix.scala   |  40 +++----
 .../linalg/distributed/DistributedMatrix.scala  |   4 +-
 .../linalg/distributed/IndexedRowMatrix.scala   |  38 +++----
 .../mllib/linalg/distributed/RowMatrix.scala    |  39 +++----
 .../apache/spark/mllib/recommendation/ALS.scala |  22 ++--
 .../MatrixFactorizationModel.scala              |  28 +++--
 .../regression/GeneralizedLinearAlgorithm.scala |  24 ++---
 .../mllib/regression/IsotonicRegression.scala   |  22 ++--
 .../spark/mllib/regression/LabeledPoint.scala   |   7 +-
 .../apache/spark/mllib/regression/Lasso.scala   |  25 ++---
 .../mllib/regression/LinearRegression.scala     |  25 ++---
 .../mllib/regression/RegressionModel.scala      |  12 +--
 .../mllib/regression/RidgeRegression.scala      |  25 ++---
 .../regression/StreamingLinearAlgorithm.scala   |  18 ++--
 .../apache/spark/mllib/stat/KernelDensity.scala |  12 +--
 .../stat/MultivariateOnlineSummarizer.scala     |  24 ++---
 .../stat/MultivariateStatisticalSummary.scala   |  19 ++--
 .../apache/spark/mllib/stat/Statistics.scala    |  30 +++---
 .../distribution/MultivariateGaussian.scala     |   8 +-
 .../apache/spark/mllib/tree/DecisionTree.scala  |  28 +++--
 .../spark/mllib/tree/GradientBoostedTrees.scala |  20 ++--
 .../apache/spark/mllib/tree/RandomForest.scala  |  20 ++--
 .../spark/mllib/tree/configuration/Algo.scala   |   4 +-
 .../tree/configuration/BoostingStrategy.scala   |  12 +--
 .../mllib/tree/configuration/FeatureType.scala  |   4 +-
 .../tree/configuration/QuantileStrategy.scala   |   4 +-
 .../mllib/tree/configuration/Strategy.scala     |  24 ++---
 .../spark/mllib/tree/impurity/Entropy.scala     |  10 +-
 .../apache/spark/mllib/tree/impurity/Gini.scala |  10 +-
 .../spark/mllib/tree/impurity/Impurity.scala    |   8 +-
 .../spark/mllib/tree/impurity/Variance.scala    |  10 +-
 .../spark/mllib/tree/loss/AbsoluteError.scala   |   6 +-
 .../apache/spark/mllib/tree/loss/LogLoss.scala  |   6 +-
 .../org/apache/spark/mllib/tree/loss/Loss.scala |   8 +-
 .../apache/spark/mllib/tree/loss/Losses.scala   |  10 +-
 .../spark/mllib/tree/loss/SquaredError.scala    |   6 +-
 .../mllib/tree/model/DecisionTreeModel.scala    |  22 ++--
 .../mllib/tree/model/InformationGainStats.scala |   4 +-
 .../apache/spark/mllib/tree/model/Node.scala    |   8 +-
 .../apache/spark/mllib/tree/model/Predict.scala |   4 +-
 .../apache/spark/mllib/tree/model/Split.scala   |   4 +-
 .../mllib/tree/model/treeEnsembleModels.scala   |  26 +++--
 .../org/apache/spark/mllib/tree/package.scala   |   1 -
 .../org/apache/spark/mllib/util/MLUtils.scala   |  36 +++----
 68 files changed, 692 insertions(+), 862 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
index ba73024..a29b425 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.classification
 
 import org.json4s.{DefaultFormats, JValue}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.rdd.RDD
@@ -36,8 +36,8 @@ trait ClassificationModel extends Serializable {
    *
    * @param testData RDD representing data points to be predicted
    * @return an RDD[Double] where each entry contains the corresponding prediction
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def predict(testData: RDD[Vector]): RDD[Double]
 
   /**
@@ -45,16 +45,16 @@ trait ClassificationModel extends Serializable {
    *
    * @param testData array representing a single data point
    * @return predicted category from the trained model
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def predict(testData: Vector): Double
 
   /**
    * Predict values for examples stored in a JavaRDD.
    * @param testData JavaRDD representing data points to be predicted
    * @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
     predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 268642a..e03e662 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.classification
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.classification.impl.GLMClassificationModel
 import org.apache.spark.mllib.linalg.BLAS.dot
 import org.apache.spark.mllib.linalg.{DenseVector, Vector}
@@ -85,8 +85,8 @@ class LogisticRegressionModel (
    * in Binary Logistic Regression. An example with prediction score greater than or equal to
    * this threshold is identified as an positive, and negative otherwise. The default value is 0.5.
    * It is only used for binary classification.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @Experimental
   def setThreshold(threshold: Double): this.type = {
     this.threshold = Some(threshold)
@@ -97,8 +97,8 @@ class LogisticRegressionModel (
    * :: Experimental ::
    * Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
    * It is only used for binary classification.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   def getThreshold: Option[Double] = threshold
 
@@ -106,8 +106,8 @@ class LogisticRegressionModel (
    * :: Experimental ::
    * Clears the threshold so that `predict` will output raw prediction scores.
    * It is only used for binary classification.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @Experimental
   def clearThreshold(): this.type = {
     threshold = None
@@ -158,9 +158,7 @@ class LogisticRegressionModel (
     }
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
       numFeatures, numClasses, weights, intercept, threshold)
@@ -168,9 +166,7 @@ class LogisticRegressionModel (
 
   override protected def formatVersion: String = "1.0"
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def toString: String = {
     s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}"
   }
@@ -178,9 +174,7 @@ class LogisticRegressionModel (
 
 object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): LogisticRegressionModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -261,8 +255,8 @@ object LogisticRegressionWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -284,8 +278,8 @@ object LogisticRegressionWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
 
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -306,8 +300,8 @@ object LogisticRegressionWithSGD {
 
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LogisticRegressionModel which has the weights and offset from training.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -324,8 +318,8 @@ object LogisticRegressionWithSGD {
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LogisticRegressionModel which has the weights and offset from training.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int): LogisticRegressionModel = {
@@ -361,8 +355,8 @@ class LogisticRegressionWithLBFGS
    * Set the number of possible outcomes for k classes classification problem in
    * Multinomial Logistic Regression.
    * By default, it is binary logistic regression so k will be set to 2.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   def setNumClasses(numClasses: Int): this.type = {
     require(numClasses > 1)

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 2df91c0..dab3692 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -25,6 +25,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{Logging, SparkContext, SparkException}
+import org.apache.spark.annotation.Since
 import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, SparseVector, Vector}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.{Loader, Saveable}
@@ -444,8 +445,8 @@ object NaiveBayes {
    *
    * @param input RDD of `(label, array of features)` pairs.  Every vector should be a frequency
    *              vector or a count vector.
-   * @since 0.9.0
    */
+  @Since("0.9.0")
   def train(input: RDD[LabeledPoint]): NaiveBayesModel = {
     new NaiveBayes().run(input)
   }
@@ -460,8 +461,8 @@ object NaiveBayes {
    * @param input RDD of `(label, array of features)` pairs.  Every vector should be a frequency
    *              vector or a count vector.
    * @param lambda The smoothing parameter
-   * @since 0.9.0
    */
+  @Since("0.9.0")
   def train(input: RDD[LabeledPoint], lambda: Double): NaiveBayesModel = {
     new NaiveBayes(lambda, Multinomial).run(input)
   }
@@ -483,8 +484,8 @@ object NaiveBayes {
    *
    * @param modelType The type of NB model to fit from the enumeration NaiveBayesModels, can be
    *              multinomial or bernoulli
-   * @since 0.9.0
    */
+  @Since("0.9.0")
   def train(input: RDD[LabeledPoint], lambda: Double, modelType: String): NaiveBayesModel = {
     require(supportedModelTypes.contains(modelType),
       s"NaiveBayes was created with an unknown modelType: $modelType.")

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 5b54fee..5f87269 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.classification
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.classification.impl.GLMClassificationModel
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.optimization._
@@ -46,8 +46,8 @@ class SVMModel (
    * Sets the threshold that separates positive predictions from negative predictions. An example
    * with prediction score greater than or equal to this threshold is identified as an positive,
    * and negative otherwise. The default value is 0.0.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   def setThreshold(threshold: Double): this.type = {
     this.threshold = Some(threshold)
@@ -57,16 +57,16 @@ class SVMModel (
   /**
    * :: Experimental ::
    * Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   def getThreshold: Option[Double] = threshold
 
   /**
    * :: Experimental ::
    * Clears the threshold so that `predict` will output raw prediction scores.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @Experimental
   def clearThreshold(): this.type = {
     threshold = None
@@ -84,9 +84,7 @@ class SVMModel (
     }
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
       numFeatures = weights.size, numClasses = 2, weights, intercept, threshold)
@@ -94,9 +92,7 @@ class SVMModel (
 
   override protected def formatVersion: String = "1.0"
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def toString: String = {
     s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}"
   }
@@ -104,9 +100,7 @@ class SVMModel (
 
 object SVMModel extends Loader[SVMModel] {
 
-   /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): SVMModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -185,8 +179,8 @@ object SVMWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -209,8 +203,8 @@ object SVMWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -231,8 +225,8 @@ object SVMWithSGD {
    * @param regParam Regularization parameter.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a SVMModel which has the weights and offset from training.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -250,8 +244,8 @@ object SVMWithSGD {
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a SVMModel which has the weights and offset from training.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(input: RDD[LabeledPoint], numIterations: Int): SVMModel = {
     train(input, numIterations, 1.0, 0.01, 1.0)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index bc27b1f..fcc9dfe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.IndexedSeq
 
 import breeze.linalg.{diag, DenseMatrix => BreezeMatrix, DenseVector => BDV, Vector => BV}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, Matrices, Vector, Vectors}
 import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
@@ -62,8 +62,8 @@ class GaussianMixture private (
   /**
    * Constructs a default instance. The default parameters are {k: 2, convergenceTol: 0.01,
    * maxIterations: 100, seed: random}.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def this() = this(2, 0.01, 100, Utils.random.nextLong())
 
   // number of samples per cluster to use when initializing Gaussians
@@ -77,8 +77,8 @@ class GaussianMixture private (
    * Set the initial GMM starting point, bypassing the random initialization.
    * You must call setK() prior to calling this method, and the condition
    * (model.k == this.k) must be met; failure will result in an IllegalArgumentException
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setInitialModel(model: GaussianMixtureModel): this.type = {
     if (model.k == k) {
       initialModel = Some(model)
@@ -90,14 +90,14 @@ class GaussianMixture private (
 
   /**
    * Return the user supplied initial GMM, if supplied
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getInitialModel: Option[GaussianMixtureModel] = initialModel
 
   /**
    * Set the number of Gaussians in the mixture model.  Default: 2
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setK(k: Int): this.type = {
     this.k = k
     this
@@ -105,14 +105,14 @@ class GaussianMixture private (
 
   /**
    * Return the number of Gaussians in the mixture model
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getK: Int = k
 
   /**
    * Set the maximum number of iterations to run. Default: 100
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
@@ -120,15 +120,15 @@ class GaussianMixture private (
 
   /**
    * Return the maximum number of iterations to run
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getMaxIterations: Int = maxIterations
 
   /**
    * Set the largest change in log-likelihood at which convergence is
    * considered to have occurred.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setConvergenceTol(convergenceTol: Double): this.type = {
     this.convergenceTol = convergenceTol
     this
@@ -137,14 +137,14 @@ class GaussianMixture private (
   /**
    * Return the largest change in log-likelihood at which convergence is
    * considered to have occurred.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getConvergenceTol: Double = convergenceTol
 
   /**
    * Set the random seed
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setSeed(seed: Long): this.type = {
     this.seed = seed
     this
@@ -152,14 +152,14 @@ class GaussianMixture private (
 
   /**
    * Return the random seed
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getSeed: Long = seed
 
   /**
    * Perform expectation maximization
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(data: RDD[Vector]): GaussianMixtureModel = {
     val sc = data.sparkContext
 
@@ -235,8 +235,8 @@ class GaussianMixture private (
 
   /**
    * Java-friendly version of [[run()]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(data: JavaRDD[Vector]): GaussianMixtureModel = run(data.rdd)
 
   private def updateWeightsAndGaussians(

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index 2fa0473..1a10a8b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -24,7 +24,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.{Vector, Matrices, Matrix}
 import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
@@ -43,8 +43,8 @@ import org.apache.spark.sql.{SQLContext, Row}
  *                the weight for Gaussian i, and weights.sum == 1
  * @param gaussians Array of MultivariateGaussian where gaussians(i) represents
  *                  the Multivariate Gaussian (Normal) Distribution for Gaussian i
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class GaussianMixtureModel(
   val weights: Array[Double],
@@ -54,23 +54,21 @@ class GaussianMixtureModel(
 
   override protected def formatVersion = "1.0"
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GaussianMixtureModel.SaveLoadV1_0.save(sc, path, weights, gaussians)
   }
 
   /**
    * Number of gaussians in mixture
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def k: Int = weights.length
 
   /**
    * Maps given points to their cluster indices.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def predict(points: RDD[Vector]): RDD[Int] = {
     val responsibilityMatrix = predictSoft(points)
     responsibilityMatrix.map(r => r.indexOf(r.max))
@@ -78,8 +76,8 @@ class GaussianMixtureModel(
 
   /**
    * Maps given point to its cluster index.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def predict(point: Vector): Int = {
     val r = computeSoftAssignments(point.toBreeze.toDenseVector, gaussians, weights, k)
     r.indexOf(r.max)
@@ -87,16 +85,16 @@ class GaussianMixtureModel(
 
   /**
    * Java-friendly version of [[predict()]]
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
     predict(points.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Integer]]
 
   /**
    * Given the input vectors, return the membership value of each vector
    * to all mixture components.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def predictSoft(points: RDD[Vector]): RDD[Array[Double]] = {
     val sc = points.sparkContext
     val bcDists = sc.broadcast(gaussians)
@@ -108,8 +106,8 @@ class GaussianMixtureModel(
 
   /**
    * Given the input vector, return the membership values to all mixture components.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def predictSoft(point: Vector): Array[Double] = {
     computeSoftAssignments(point.toBreeze.toDenseVector, gaussians, weights, k)
   }
@@ -133,9 +131,7 @@ class GaussianMixtureModel(
   }
 }
 
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
 @Experimental
 object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
 
@@ -186,9 +182,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
     }
   }
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def load(sc: SparkContext, path: String) : GaussianMixtureModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     implicit val formats = DefaultFormats

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 9ef6834..3e9545a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.linalg.BLAS.{axpy, scal}
 import org.apache.spark.mllib.util.MLUtils
@@ -49,20 +49,20 @@ class KMeans private (
   /**
    * Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
    * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4, seed: random}.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong())
 
   /**
    * Number of clusters to create (k).
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getK: Int = k
 
   /**
    * Set the number of clusters to create (k). Default: 2.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setK(k: Int): this.type = {
     this.k = k
     this
@@ -70,14 +70,14 @@ class KMeans private (
 
   /**
    * Maximum number of iterations to run.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getMaxIterations: Int = maxIterations
 
   /**
    * Set maximum number of iterations to run. Default: 20.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
@@ -85,16 +85,16 @@ class KMeans private (
 
   /**
    * The initialization algorithm. This can be either "random" or "k-means||".
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getInitializationMode: String = initializationMode
 
   /**
    * Set the initialization algorithm. This can be either "random" to choose random points as
    * initial cluster centers, or "k-means||" to use a parallel variant of k-means++
    * (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setInitializationMode(initializationMode: String): this.type = {
     KMeans.validateInitMode(initializationMode)
     this.initializationMode = initializationMode
@@ -104,8 +104,8 @@ class KMeans private (
   /**
    * :: Experimental ::
    * Number of runs of the algorithm to execute in parallel.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   @Experimental
   def getRuns: Int = runs
 
@@ -114,8 +114,8 @@ class KMeans private (
    * Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
    * this many times with random starting conditions (configured by the initialization mode), then
    * return the best clustering found over any run. Default: 1.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   @Experimental
   def setRuns(runs: Int): this.type = {
     if (runs <= 0) {
@@ -127,15 +127,15 @@ class KMeans private (
 
   /**
    * Number of steps for the k-means|| initialization mode
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getInitializationSteps: Int = initializationSteps
 
   /**
    * Set the number of steps for the k-means|| initialization mode. This is an advanced
    * setting -- the default of 5 is almost always enough. Default: 5.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setInitializationSteps(initializationSteps: Int): this.type = {
     if (initializationSteps <= 0) {
       throw new IllegalArgumentException("Number of initialization steps must be positive")
@@ -146,15 +146,15 @@ class KMeans private (
 
   /**
    * The distance threshold within which we've consider centers to have converged.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getEpsilon: Double = epsilon
 
   /**
    * Set the distance threshold within which we've consider centers to have converged.
    * If all centers move less than this Euclidean distance, we stop iterating one run.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setEpsilon(epsilon: Double): this.type = {
     this.epsilon = epsilon
     this
@@ -162,14 +162,14 @@ class KMeans private (
 
   /**
    * The random seed for cluster initialization.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getSeed: Long = seed
 
   /**
    * Set the random seed for cluster initialization.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setSeed(seed: Long): this.type = {
     this.seed = seed
     this
@@ -183,8 +183,8 @@ class KMeans private (
    * Set the initial starting point, bypassing the random initialization or k-means||
    * The condition model.k == this.k must be met, failure results
    * in an IllegalArgumentException.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setInitialModel(model: KMeansModel): this.type = {
     require(model.k == k, "mismatched cluster count")
     initialModel = Some(model)
@@ -194,8 +194,8 @@ class KMeans private (
   /**
    * Train a K-means model on the given set of points; `data` should be cached for high
    * performance, because this is an iterative algorithm.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def run(data: RDD[Vector]): KMeansModel = {
 
     if (data.getStorageLevel == StorageLevel.NONE) {
@@ -453,14 +453,14 @@ class KMeans private (
 
 /**
  * Top-level methods for calling K-means clustering.
- * @since 0.8.0
  */
+@Since("0.8.0")
 object KMeans {
 
   // Initialization mode names
-  /** @since 0.8.0 */
+  @Since("0.8.0")
   val RANDOM = "random"
-  /** @since 0.8.0 */
+  @Since("0.8.0")
   val K_MEANS_PARALLEL = "k-means||"
 
   /**
@@ -472,8 +472,8 @@ object KMeans {
    * @param runs number of parallel runs, defaults to 1. The best model is returned.
    * @param initializationMode initialization model, either "random" or "k-means||" (default).
    * @param seed random seed value for cluster initialization
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def train(
       data: RDD[Vector],
       k: Int,
@@ -497,8 +497,8 @@ object KMeans {
    * @param maxIterations max number of iterations
    * @param runs number of parallel runs, defaults to 1. The best model is returned.
    * @param initializationMode initialization model, either "random" or "k-means||" (default).
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       data: RDD[Vector],
       k: Int,
@@ -514,8 +514,8 @@ object KMeans {
 
   /**
    * Trains a k-means model using specified parameters and the default values for unspecified.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       data: RDD[Vector],
       k: Int,
@@ -525,8 +525,8 @@ object KMeans {
 
   /**
    * Trains a k-means model using specified parameters and the default values for unspecified.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       data: RDD[Vector],
       k: Int,

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 8de2087..e425ecd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -23,6 +23,7 @@ import org.json4s._
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.pmml.PMMLExportable
@@ -34,35 +35,35 @@ import org.apache.spark.sql.Row
 
 /**
  * A clustering model for K-means. Each point belongs to the cluster with the closest center.
- * @since 0.8.0
  */
+@Since("0.8.0")
 class KMeansModel (
     val clusterCenters: Array[Vector]) extends Saveable with Serializable with PMMLExportable {
 
   /**
    * A Java-friendly constructor that takes an Iterable of Vectors.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def this(centers: java.lang.Iterable[Vector]) = this(centers.asScala.toArray)
 
   /**
    * Total number of clusters.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def k: Int = clusterCenters.length
 
   /**
    * Returns the cluster index that a given point belongs to.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def predict(point: Vector): Int = {
     KMeans.findClosest(clusterCentersWithNorm, new VectorWithNorm(point))._1
   }
 
   /**
    * Maps given points to their cluster indices.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(points: RDD[Vector]): RDD[Int] = {
     val centersWithNorm = clusterCentersWithNorm
     val bcCentersWithNorm = points.context.broadcast(centersWithNorm)
@@ -71,16 +72,16 @@ class KMeansModel (
 
   /**
    * Maps given points to their cluster indices.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
     predict(points.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Integer]]
 
   /**
    * Return the K-means cost (sum of squared distances of points to their nearest center) for this
    * model on the given data.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def computeCost(data: RDD[Vector]): Double = {
     val centersWithNorm = clusterCentersWithNorm
     val bcCentersWithNorm = data.context.broadcast(centersWithNorm)
@@ -90,9 +91,7 @@ class KMeansModel (
   private def clusterCentersWithNorm: Iterable[VectorWithNorm] =
     clusterCenters.map(new VectorWithNorm(_))
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def save(sc: SparkContext, path: String): Unit = {
     KMeansModel.SaveLoadV1_0.save(sc, this, path)
   }
@@ -100,14 +99,10 @@ class KMeansModel (
   override protected def formatVersion: String = "1.0"
 }
 
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
 object KMeansModel extends Loader[KMeansModel] {
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def load(sc: SparkContext, path: String): KMeansModel = {
     KMeansModel.SaveLoadV1_0.load(sc, path)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index 2a8c6ac..92a321a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering
 import breeze.linalg.{DenseVector => BDV}
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.api.java.JavaPairRDD
 import org.apache.spark.graphx._
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
@@ -43,8 +43,8 @@ import org.apache.spark.util.Utils
  *
  * @see [[http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation Latent Dirichlet allocation
  *       (Wikipedia)]]
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class LDA private (
     private var k: Int,
@@ -57,8 +57,8 @@ class LDA private (
 
   /**
    * Constructs a LDA instance with default parameters.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def this() = this(k = 10, maxIterations = 20, docConcentration = Vectors.dense(-1),
     topicConcentration = -1, seed = Utils.random.nextLong(), checkpointInterval = 10,
     ldaOptimizer = new EMLDAOptimizer)
@@ -66,15 +66,15 @@ class LDA private (
   /**
    * Number of topics to infer.  I.e., the number of soft cluster centers.
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getK: Int = k
 
   /**
    * Number of topics to infer.  I.e., the number of soft cluster centers.
    * (default = 10)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setK(k: Int): this.type = {
     require(k > 0, s"LDA k (number of clusters) must be > 0, but was set to $k")
     this.k = k
@@ -86,8 +86,8 @@ class LDA private (
    * distributions over topics ("theta").
    *
    * This is the parameter to a Dirichlet distribution.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def getAsymmetricDocConcentration: Vector = this.docConcentration
 
   /**
@@ -96,8 +96,8 @@ class LDA private (
    *
    * This method assumes the Dirichlet distribution is symmetric and can be described by a single
    * [[Double]] parameter. It should fail if docConcentration is asymmetric.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getDocConcentration: Double = {
     val parameter = docConcentration(0)
     if (docConcentration.size == 1) {
@@ -131,8 +131,8 @@ class LDA private (
    *     - Values should be >= 0
    *     - default = uniformly (1.0 / k), following the implementation from
    *       [[https://github.com/Blei-Lab/onlineldavb]].
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def setDocConcentration(docConcentration: Vector): this.type = {
     require(docConcentration.size > 0, "docConcentration must have > 0 elements")
     this.docConcentration = docConcentration
@@ -141,8 +141,8 @@ class LDA private (
 
   /**
    * Replicates a [[Double]] docConcentration to create a symmetric prior.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setDocConcentration(docConcentration: Double): this.type = {
     this.docConcentration = Vectors.dense(docConcentration)
     this
@@ -150,26 +150,26 @@ class LDA private (
 
   /**
    * Alias for [[getAsymmetricDocConcentration]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def getAsymmetricAlpha: Vector = getAsymmetricDocConcentration
 
   /**
    * Alias for [[getDocConcentration]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getAlpha: Double = getDocConcentration
 
   /**
    * Alias for [[setDocConcentration()]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def setAlpha(alpha: Vector): this.type = setDocConcentration(alpha)
 
   /**
    * Alias for [[setDocConcentration()]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setAlpha(alpha: Double): this.type = setDocConcentration(alpha)
 
   /**
@@ -180,8 +180,8 @@ class LDA private (
    *
    * Note: The topics' distributions over terms are called "beta" in the original LDA paper
    * by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getTopicConcentration: Double = this.topicConcentration
 
   /**
@@ -205,8 +205,8 @@ class LDA private (
    *     - Value should be >= 0
    *     - default = (1.0 / k), following the implementation from
    *       [[https://github.com/Blei-Lab/onlineldavb]].
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setTopicConcentration(topicConcentration: Double): this.type = {
     this.topicConcentration = topicConcentration
     this
@@ -214,27 +214,27 @@ class LDA private (
 
   /**
    * Alias for [[getTopicConcentration]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getBeta: Double = getTopicConcentration
 
   /**
    * Alias for [[setTopicConcentration()]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setBeta(beta: Double): this.type = setTopicConcentration(beta)
 
   /**
    * Maximum number of iterations for learning.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getMaxIterations: Int = maxIterations
 
   /**
    * Maximum number of iterations for learning.
    * (default = 20)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
@@ -242,14 +242,14 @@ class LDA private (
 
   /**
    * Random seed
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getSeed: Long = seed
 
   /**
    * Random seed
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setSeed(seed: Long): this.type = {
     this.seed = seed
     this
@@ -257,8 +257,8 @@ class LDA private (
 
   /**
    * Period (in iterations) between checkpoints.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getCheckpointInterval: Int = checkpointInterval
 
   /**
@@ -268,8 +268,8 @@ class LDA private (
    * [[org.apache.spark.SparkContext]], this setting is ignored.
    *
    * @see [[org.apache.spark.SparkContext#setCheckpointDir]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setCheckpointInterval(checkpointInterval: Int): this.type = {
     this.checkpointInterval = checkpointInterval
     this
@@ -280,8 +280,8 @@ class LDA private (
    * :: DeveloperApi ::
    *
    * LDAOptimizer used to perform the actual calculation
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   @DeveloperApi
   def getOptimizer: LDAOptimizer = ldaOptimizer
 
@@ -289,8 +289,8 @@ class LDA private (
    * :: DeveloperApi ::
    *
    * LDAOptimizer used to perform the actual calculation (default = EMLDAOptimizer)
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   @DeveloperApi
   def setOptimizer(optimizer: LDAOptimizer): this.type = {
     this.ldaOptimizer = optimizer
@@ -300,8 +300,8 @@ class LDA private (
   /**
    * Set the LDAOptimizer used to perform the actual calculation by algorithm name.
    * Currently "em", "online" are supported.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setOptimizer(optimizerName: String): this.type = {
     this.ldaOptimizer =
       optimizerName.toLowerCase match {
@@ -321,8 +321,8 @@ class LDA private (
    *                   (where the vocabulary size is the length of the vector).
    *                   Document IDs must be unique and >= 0.
    * @return  Inferred LDA model
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(documents: RDD[(Long, Vector)]): LDAModel = {
     val state = ldaOptimizer.initialize(documents, this)
     var iter = 0
@@ -339,8 +339,8 @@ class LDA private (
 
   /**
    * Java-friendly version of [[run()]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(documents: JavaPairRDD[java.lang.Long, Vector]): LDAModel = {
     run(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 6bc68a4..667374a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
 import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId}
 import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
@@ -192,24 +192,16 @@ class LocalLDAModel private[clustering] (
     override protected[clustering] val gammaShape: Double = 100)
   extends LDAModel with Serializable {
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def k: Int = topics.numCols
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def vocabSize: Int = topics.numRows
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def topicsMatrix: Matrix = topics
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])] = {
     val brzTopics = topics.toBreeze.toDenseMatrix
     Range(0, k).map { topicIndex =>
@@ -222,9 +214,7 @@ class LocalLDAModel private[clustering] (
 
   override protected def formatVersion = "1.0"
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def save(sc: SparkContext, path: String): Unit = {
     LocalLDAModel.SaveLoadV1_0.save(sc, path, topicsMatrix, docConcentration, topicConcentration,
       gammaShape)
@@ -238,16 +228,16 @@ class LocalLDAModel private[clustering] (
    *
    * @param documents test corpus to use for calculating log likelihood
    * @return variational lower bound on the log likelihood of the entire corpus
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def logLikelihood(documents: RDD[(Long, Vector)]): Double = logLikelihoodBound(documents,
     docConcentration, topicConcentration, topicsMatrix.toBreeze.toDenseMatrix, gammaShape, k,
     vocabSize)
 
   /**
    * Java-friendly version of [[logLikelihood]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def logLikelihood(documents: JavaPairRDD[java.lang.Long, Vector]): Double = {
     logLikelihood(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
   }
@@ -258,8 +248,8 @@ class LocalLDAModel private[clustering] (
    *
    * @param documents test corpus to use for calculating perplexity
    * @return Variational upper bound on log perplexity per token.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def logPerplexity(documents: RDD[(Long, Vector)]): Double = {
     val corpusTokenCount = documents
       .map { case (_, termCounts) => termCounts.toArray.sum }
@@ -267,9 +257,8 @@ class LocalLDAModel private[clustering] (
     -logLikelihood(documents) / corpusTokenCount
   }
 
-  /** Java-friendly version of [[logPerplexity]]
-   *  @since 1.5.0
-   */
+  /** Java-friendly version of [[logPerplexity]] */
+  @Since("1.5.0")
   def logPerplexity(documents: JavaPairRDD[java.lang.Long, Vector]): Double = {
     logPerplexity(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
   }
@@ -347,8 +336,8 @@ class LocalLDAModel private[clustering] (
    * for each document.
    * @param documents documents to predict topic mixture distributions for
    * @return An RDD of (document ID, topic mixture distribution for document)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   // TODO: declare in LDAModel and override once implemented in DistributedLDAModel
   def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = {
     // Double transpose because dirichletExpectation normalizes by row and we need to normalize
@@ -376,8 +365,8 @@ class LocalLDAModel private[clustering] (
 
   /**
    * Java-friendly version of [[topicDistributions]]
-   * @since 1.4.1
    */
+  @Since("1.4.1")
   def topicDistributions(
       documents: JavaPairRDD[java.lang.Long, Vector]): JavaPairRDD[java.lang.Long, Vector] = {
     val distributions = topicDistributions(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
@@ -451,9 +440,7 @@ object LocalLDAModel extends Loader[LocalLDAModel] {
     }
   }
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def load(sc: SparkContext, path: String): LocalLDAModel = {
     val (loadedClassName, loadedVersion, metadata) = Loader.loadMetadata(sc, path)
     implicit val formats = DefaultFormats
@@ -510,8 +497,8 @@ class DistributedLDAModel private[clustering] (
    * Convert model to a local model.
    * The local model stores the inferred topics but not the topic distributions for training
    * documents.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def toLocal: LocalLDAModel = new LocalLDAModel(topicsMatrix, docConcentration, topicConcentration,
     gammaShape)
 
@@ -521,8 +508,8 @@ class DistributedLDAModel private[clustering] (
    * No guarantees are given about the ordering of the topics.
    *
    * WARNING: This matrix is collected from an RDD. Beware memory usage when vocabSize, k are large.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override lazy val topicsMatrix: Matrix = {
     // Collect row-major topics
     val termTopicCounts: Array[(Int, TopicCounts)] =
@@ -541,9 +528,7 @@ class DistributedLDAModel private[clustering] (
     Matrices.fromBreeze(brzTopics)
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])] = {
     val numTopics = k
     // Note: N_k is not needed to find the top terms, but it is needed to normalize weights
@@ -582,8 +567,8 @@ class DistributedLDAModel private[clustering] (
    * @return  Array over topics.  Each element represent as a pair of matching arrays:
    *          (IDs for the documents, weights of the topic in these documents).
    *          For each topic, documents are sorted in order of decreasing topic weights.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def topDocumentsPerTopic(maxDocumentsPerTopic: Int): Array[(Array[Long], Array[Double])] = {
     val numTopics = k
     val topicsInQueues: Array[BoundedPriorityQueue[(Double, Long)]] =
@@ -666,8 +651,8 @@ class DistributedLDAModel private[clustering] (
    *  - This excludes the prior; for that, use [[logPrior]].
    *  - Even with [[logPrior]], this is NOT the same as the data log likelihood given the
    *    hyperparameters.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   lazy val logLikelihood: Double = {
     // TODO: generalize this for asymmetric (non-scalar) alpha
     val alpha = this.docConcentration(0) // To avoid closure capture of enclosing object
@@ -693,8 +678,8 @@ class DistributedLDAModel private[clustering] (
   /**
    * Log probability of the current parameter estimate:
    * log P(topics, topic distributions for docs | alpha, eta)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   lazy val logPrior: Double = {
     // TODO: generalize this for asymmetric (non-scalar) alpha
     val alpha = this.docConcentration(0) // To avoid closure capture of enclosing object
@@ -725,8 +710,8 @@ class DistributedLDAModel private[clustering] (
    * ("theta_doc").
    *
    * @return  RDD of (document ID, topic distribution) pairs
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def topicDistributions: RDD[(Long, Vector)] = {
     graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
       (docID.toLong, Vectors.fromBreeze(normalize(topicCounts, 1.0)))
@@ -735,8 +720,8 @@ class DistributedLDAModel private[clustering] (
 
   /**
    * Java-friendly version of [[topicDistributions]]
-   * @since 1.4.1
    */
+  @Since("1.4.1")
   def javaTopicDistributions: JavaPairRDD[java.lang.Long, Vector] = {
     JavaPairRDD.fromRDD(topicDistributions.asInstanceOf[RDD[(java.lang.Long, Vector)]])
   }
@@ -744,8 +729,8 @@ class DistributedLDAModel private[clustering] (
   /**
    * For each document, return the top k weighted topics for that document and their weights.
    * @return RDD of (doc ID, topic indices, topic weights)
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def topTopicsPerDocument(k: Int): RDD[(Long, Array[Int], Array[Double])] = {
     graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
       val topIndices = argtopk(topicCounts, k)
@@ -761,8 +746,8 @@ class DistributedLDAModel private[clustering] (
 
   /**
    * Java-friendly version of [[topTopicsPerDocument]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def javaTopTopicsPerDocument(k: Int): JavaRDD[(java.lang.Long, Array[Int], Array[Double])] = {
     val topics = topTopicsPerDocument(k)
     topics.asInstanceOf[RDD[(java.lang.Long, Array[Int], Array[Double])]].toJavaRDD()
@@ -775,8 +760,8 @@ class DistributedLDAModel private[clustering] (
 
   /**
    * Java-friendly version of [[topicDistributions]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   override def save(sc: SparkContext, path: String): Unit = {
     DistributedLDAModel.SaveLoadV1_0.save(
       sc, path, graph, globalTopicTotals, k, vocabSize, docConcentration, topicConcentration,
@@ -877,9 +862,7 @@ object DistributedLDAModel extends Loader[DistributedLDAModel] {
 
   }
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def load(sc: SparkContext, path: String): DistributedLDAModel = {
     val (loadedClassName, loadedVersion, metadata) = Loader.loadMetadata(sc, path)
     implicit val formats = DefaultFormats

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index cb517f9..5c2aae6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -23,7 +23,7 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, all, normalize, su
 import breeze.numerics.{trigamma, abs, exp}
 import breeze.stats.distributions.{Gamma, RandBasis}
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.impl.GraphImpl
 import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer
@@ -35,8 +35,8 @@ import org.apache.spark.rdd.RDD
  *
  * An LDAOptimizer specifies which optimization/learning/inference algorithm to use, and it can
  * hold optimizer-specific parameters for users to set.
- * @since 1.4.0
  */
+@Since("1.4.0")
 @DeveloperApi
 sealed trait LDAOptimizer {
 
@@ -74,8 +74,8 @@ sealed trait LDAOptimizer {
  *  - Paper which clearly explains several algorithms, including EM:
  *    Asuncion, Welling, Smyth, and Teh.
  *    "On Smoothing and Inference for Topic Models."  UAI, 2009.
- * @since 1.4.0
  */
+@Since("1.4.0")
 @DeveloperApi
 final class EMLDAOptimizer extends LDAOptimizer {
 
@@ -226,8 +226,8 @@ final class EMLDAOptimizer extends LDAOptimizer {
  *
  * Original Online LDA paper:
  *   Hoffman, Blei and Bach, "Online Learning for Latent Dirichlet Allocation." NIPS, 2010.
- * @since 1.4.0
  */
+@Since("1.4.0")
 @DeveloperApi
 final class OnlineLDAOptimizer extends LDAOptimizer {
 
@@ -276,16 +276,16 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
   /**
    * A (positive) learning parameter that downweights early iterations. Larger values make early
    * iterations count less.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getTau0: Double = this.tau0
 
   /**
    * A (positive) learning parameter that downweights early iterations. Larger values make early
    * iterations count less.
    * Default: 1024, following the original Online LDA paper.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setTau0(tau0: Double): this.type = {
     require(tau0 > 0, s"LDA tau0 must be positive, but was set to $tau0")
     this.tau0 = tau0
@@ -294,16 +294,16 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
 
   /**
    * Learning rate: exponential decay rate
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getKappa: Double = this.kappa
 
   /**
    * Learning rate: exponential decay rate---should be between
    * (0.5, 1.0] to guarantee asymptotic convergence.
    * Default: 0.51, based on the original Online LDA paper.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setKappa(kappa: Double): this.type = {
     require(kappa >= 0, s"Online LDA kappa must be nonnegative, but was set to $kappa")
     this.kappa = kappa
@@ -312,8 +312,8 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
 
   /**
    * Mini-batch fraction, which sets the fraction of document sampled and used in each iteration
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getMiniBatchFraction: Double = this.miniBatchFraction
 
   /**
@@ -325,8 +325,8 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
    * maxIterations * miniBatchFraction >= 1.
    *
    * Default: 0.05, i.e., 5% of total documents.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
     require(miniBatchFraction > 0.0 && miniBatchFraction <= 1.0,
       s"Online LDA miniBatchFraction must be in range (0,1], but was set to $miniBatchFraction")
@@ -337,16 +337,16 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
   /**
    * Optimize alpha, indicates whether alpha (Dirichlet parameter for document-topic distribution)
    * will be optimized during training.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def getOptimzeAlpha: Boolean = this.optimizeAlpha
 
   /**
    * Sets whether to optimize alpha parameter during training.
    *
    * Default: false
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def setOptimzeAlpha(optimizeAlpha: Boolean): this.type = {
     this.optimizeAlpha = optimizeAlpha
     this

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index b4733ca..396b36f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -21,7 +21,7 @@ import org.json4s.JsonDSL._
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.impl.GraphImpl
@@ -39,16 +39,14 @@ import org.apache.spark.{Logging, SparkContext, SparkException}
  *
  * @param k number of clusters
  * @param assignments an RDD of clustering [[PowerIterationClustering#Assignment]]s
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class PowerIterationClusteringModel(
     val k: Int,
     val assignments: RDD[PowerIterationClustering.Assignment]) extends Saveable with Serializable {
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def save(sc: SparkContext, path: String): Unit = {
     PowerIterationClusteringModel.SaveLoadV1_0.save(sc, this, path)
   }
@@ -56,9 +54,7 @@ class PowerIterationClusteringModel(
   override protected def formatVersion: String = "1.0"
 }
 
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
 object PowerIterationClusteringModel extends Loader[PowerIterationClusteringModel] {
   override def load(sc: SparkContext, path: String): PowerIterationClusteringModel = {
     PowerIterationClusteringModel.SaveLoadV1_0.load(sc, path)
@@ -73,8 +69,8 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
     val thisClassName = "org.apache.spark.mllib.clustering.PowerIterationClusteringModel"
 
     /**
-     * @since 1.4.0
      */
+    @Since("1.4.0")
     def save(sc: SparkContext, model: PowerIterationClusteringModel, path: String): Unit = {
       val sqlContext = new SQLContext(sc)
       import sqlContext.implicits._
@@ -87,9 +83,7 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
       dataRDD.write.parquet(Loader.dataPath(path))
     }
 
-    /**
-     * @since 1.4.0
-     */
+    @Since("1.4.0")
     def load(sc: SparkContext, path: String): PowerIterationClusteringModel = {
       implicit val formats = DefaultFormats
       val sqlContext = new SQLContext(sc)
@@ -136,14 +130,14 @@ class PowerIterationClustering private[clustering] (
   /**
    * Constructs a PIC instance with default parameters: {k: 2, maxIterations: 100,
    * initMode: "random"}.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def this() = this(k = 2, maxIterations = 100, initMode = "random")
 
   /**
    * Set the number of clusters.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setK(k: Int): this.type = {
     this.k = k
     this
@@ -151,8 +145,8 @@ class PowerIterationClustering private[clustering] (
 
   /**
    * Set maximum number of iterations of the power iteration loop
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
@@ -161,8 +155,8 @@ class PowerIterationClustering private[clustering] (
   /**
    * Set the initialization mode. This can be either "random" to use a random vector
    * as vertex properties, or "degree" to use normalized sum similarities. Default: random.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setInitializationMode(mode: String): this.type = {
     this.initMode = mode match {
       case "random" | "degree" => mode
@@ -182,8 +176,8 @@ class PowerIterationClustering private[clustering] (
    *              assume s,,ij,, = 0.0.
    *
    * @return a [[PowerIterationClusteringModel]] that contains the clustering result
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def run(graph: Graph[Double, Double]): PowerIterationClusteringModel = {
     val w = normalize(graph)
     val w0 = initMode match {
@@ -204,8 +198,8 @@ class PowerIterationClustering private[clustering] (
    *                     assume s,,ij,, = 0.0.
    *
    * @return a [[PowerIterationClusteringModel]] that contains the clustering result
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(similarities: RDD[(Long, Long, Double)]): PowerIterationClusteringModel = {
     val w = normalize(similarities)
     val w0 = initMode match {
@@ -217,8 +211,8 @@ class PowerIterationClustering private[clustering] (
 
   /**
    * A Java-friendly version of [[PowerIterationClustering.run]].
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(similarities: JavaRDD[(java.lang.Long, java.lang.Long, java.lang.Double)])
     : PowerIterationClusteringModel = {
     run(similarities.rdd.asInstanceOf[RDD[(Long, Long, Double)]])
@@ -242,9 +236,7 @@ class PowerIterationClustering private[clustering] (
   }
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 @Experimental
 object PowerIterationClustering extends Logging {
 
@@ -253,8 +245,8 @@ object PowerIterationClustering extends Logging {
    * Cluster assignment.
    * @param id node id
    * @param cluster assigned cluster id
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   case class Assignment(id: Long, cluster: Int)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index a915804..41f2668 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering
 import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaSparkContext._
 import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.rdd.RDD
@@ -63,9 +63,8 @@ import org.apache.spark.util.random.XORShiftRandom
  * such that at time t + h the discount applied to the data from t is 0.5.
  * The definition remains the same whether the time unit is given
  * as batches or points.
- * @since 1.2.0
- *
  */
+@Since("1.2.0")
 @Experimental
 class StreamingKMeansModel(
     override val clusterCenters: Array[Vector],
@@ -73,8 +72,8 @@ class StreamingKMeansModel(
 
   /**
    * Perform a k-means update on a batch of data.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def update(data: RDD[Vector], decayFactor: Double, timeUnit: String): StreamingKMeansModel = {
 
     // find nearest cluster to each point
@@ -166,23 +165,23 @@ class StreamingKMeansModel(
  *    .setRandomCenters(5, 100.0)
  *    .trainOn(DStream)
  * }}}
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 class StreamingKMeans(
     var k: Int,
     var decayFactor: Double,
     var timeUnit: String) extends Logging with Serializable {
 
-  /** @since 1.2.0 */
+  @Since("1.2.0")
   def this() = this(2, 1.0, StreamingKMeans.BATCHES)
 
   protected var model: StreamingKMeansModel = new StreamingKMeansModel(null, null)
 
   /**
    * Set the number of clusters.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setK(k: Int): this.type = {
     this.k = k
     this
@@ -190,8 +189,8 @@ class StreamingKMeans(
 
   /**
    * Set the decay factor directly (for forgetful algorithms).
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setDecayFactor(a: Double): this.type = {
     this.decayFactor = a
     this
@@ -199,8 +198,8 @@ class StreamingKMeans(
 
   /**
    * Set the half life and time unit ("batches" or "points") for forgetful algorithms.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setHalfLife(halfLife: Double, timeUnit: String): this.type = {
     if (timeUnit != StreamingKMeans.BATCHES && timeUnit != StreamingKMeans.POINTS) {
       throw new IllegalArgumentException("Invalid time unit for decay: " + timeUnit)
@@ -213,8 +212,8 @@ class StreamingKMeans(
 
   /**
    * Specify initial centers directly.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setInitialCenters(centers: Array[Vector], weights: Array[Double]): this.type = {
     model = new StreamingKMeansModel(centers, weights)
     this
@@ -226,8 +225,8 @@ class StreamingKMeans(
    * @param dim Number of dimensions
    * @param weight Weight for each center
    * @param seed Random seed
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setRandomCenters(dim: Int, weight: Double, seed: Long = Utils.random.nextLong): this.type = {
     val random = new XORShiftRandom(seed)
     val centers = Array.fill(k)(Vectors.dense(Array.fill(dim)(random.nextGaussian())))
@@ -238,8 +237,8 @@ class StreamingKMeans(
 
   /**
    * Return the latest model.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def latestModel(): StreamingKMeansModel = {
     model
   }
@@ -251,8 +250,8 @@ class StreamingKMeans(
    * and updates the model using each batch of data from the stream.
    *
    * @param data DStream containing vector data
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def trainOn(data: DStream[Vector]) {
     assertInitialized()
     data.foreachRDD { (rdd, time) =>
@@ -262,8 +261,8 @@ class StreamingKMeans(
 
   /**
    * Java-friendly version of `trainOn`.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def trainOn(data: JavaDStream[Vector]): Unit = trainOn(data.dstream)
 
   /**
@@ -271,8 +270,8 @@ class StreamingKMeans(
    *
    * @param data DStream containing vector data
    * @return DStream containing predictions
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def predictOn(data: DStream[Vector]): DStream[Int] = {
     assertInitialized()
     data.map(model.predict)
@@ -280,8 +279,8 @@ class StreamingKMeans(
 
   /**
    * Java-friendly version of `predictOn`.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Integer] = {
     JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Integer]])
   }
@@ -292,8 +291,8 @@ class StreamingKMeans(
    * @param data DStream containing (key, feature vector) pairs
    * @tparam K key type
    * @return DStream containing the input keys and the predictions as values
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Int)] = {
     assertInitialized()
     data.mapValues(model.predict)
@@ -301,8 +300,8 @@ class StreamingKMeans(
 
   /**
    * Java-friendly version of `predictOnValues`.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def predictOnValues[K](
       data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Integer] = {
     implicit val tag = fakeClassTag[K]

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 486741e..76ae847 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.evaluation
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.Logging
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.evaluation.binary._
@@ -41,8 +41,8 @@ import org.apache.spark.sql.DataFrame
  *                of bins may not exactly equal numBins. The last bin in each partition may
  *                be smaller as a result, meaning there may be an extra sample at
  *                partition boundaries.
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class BinaryClassificationMetrics(
     val scoreAndLabels: RDD[(Double, Double)],
@@ -52,8 +52,8 @@ class BinaryClassificationMetrics(
 
   /**
    * Defaults `numBins` to 0.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def this(scoreAndLabels: RDD[(Double, Double)]) = this(scoreAndLabels, 0)
 
   /**
@@ -65,16 +65,16 @@ class BinaryClassificationMetrics(
 
   /**
    * Unpersist intermediate RDDs used in the computation.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def unpersist() {
     cumulativeCounts.unpersist()
   }
 
   /**
    * Returns thresholds in descending order.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def thresholds(): RDD[Double] = cumulativeCounts.map(_._1)
 
   /**
@@ -82,8 +82,8 @@ class BinaryClassificationMetrics(
    * which is an RDD of (false positive rate, true positive rate)
    * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
    * @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def roc(): RDD[(Double, Double)] = {
     val rocCurve = createCurve(FalsePositiveRate, Recall)
     val sc = confusions.context
@@ -94,16 +94,16 @@ class BinaryClassificationMetrics(
 
   /**
    * Computes the area under the receiver operating characteristic (ROC) curve.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def areaUnderROC(): Double = AreaUnderCurve.of(roc())
 
   /**
    * Returns the precision-recall curve, which is an RDD of (recall, precision),
    * NOT (precision, recall), with (0.0, 1.0) prepended to it.
    * @see http://en.wikipedia.org/wiki/Precision_and_recall
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def pr(): RDD[(Double, Double)] = {
     val prCurve = createCurve(Recall, Precision)
     val sc = confusions.context
@@ -113,8 +113,8 @@ class BinaryClassificationMetrics(
 
   /**
    * Computes the area under the precision-recall curve.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def areaUnderPR(): Double = AreaUnderCurve.of(pr())
 
   /**
@@ -122,26 +122,26 @@ class BinaryClassificationMetrics(
    * @param beta the beta factor in F-Measure computation.
    * @return an RDD of (threshold, F-Measure) pairs.
    * @see http://en.wikipedia.org/wiki/F1_score
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def fMeasureByThreshold(beta: Double): RDD[(Double, Double)] = createCurve(FMeasure(beta))
 
   /**
    * Returns the (threshold, F-Measure) curve with beta = 1.0.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def fMeasureByThreshold(): RDD[(Double, Double)] = fMeasureByThreshold(1.0)
 
   /**
    * Returns the (threshold, precision) curve.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def precisionByThreshold(): RDD[(Double, Double)] = createCurve(Precision)
 
   /**
    * Returns the (threshold, recall) curve.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def recallByThreshold(): RDD[(Double, Double)] = createCurve(Recall)
 
   private lazy val (

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index dddfa3e..02e89d9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.evaluation
 import scala.collection.Map
 
 import org.apache.spark.SparkContext._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.linalg.{Matrices, Matrix}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.DataFrame
@@ -30,8 +30,8 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for multiclass classification.
  *
  * @param predictionAndLabels an RDD of (prediction, label) pairs.
- * @since 1.1.0
  */
+@Since("1.1.0")
 @Experimental
 class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
 
@@ -65,8 +65,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
    * predicted classes are in columns,
    * they are ordered by class label ascending,
    * as in "labels"
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def confusionMatrix: Matrix = {
     val n = labels.size
     val values = Array.ofDim[Double](n * n)
@@ -85,15 +85,15 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns true positive rate for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def truePositiveRate(label: Double): Double = recall(label)
 
   /**
    * Returns false positive rate for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def falsePositiveRate(label: Double): Double = {
     val fp = fpByClass.getOrElse(label, 0)
     fp.toDouble / (labelCount - labelCountByClass(label))
@@ -102,8 +102,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns precision for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def precision(label: Double): Double = {
     val tp = tpByClass(label)
     val fp = fpByClass.getOrElse(label, 0)
@@ -113,16 +113,16 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns recall for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label)
 
   /**
    * Returns f-measure for a given label (category)
    * @param label the label.
    * @param beta the beta parameter.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def fMeasure(label: Double, beta: Double): Double = {
     val p = precision(label)
     val r = recall(label)
@@ -133,8 +133,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns f1-measure for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def fMeasure(label: Double): Double = fMeasure(label, 1.0)
 
   /**
@@ -187,8 +187,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns weighted averaged f-measure
    * @param beta the beta parameter.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def weightedFMeasure(beta: Double): Double = labelCountByClass.map { case (category, count) =>
     fMeasure(category, beta) * count.toDouble / labelCount
   }.sum

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
index 77cb1e0..a0a8d9c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.evaluation
 
+import org.apache.spark.annotation.Since
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
 import org.apache.spark.sql.DataFrame
@@ -25,8 +26,8 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for multilabel classification.
  * @param predictionAndLabels an RDD of (predictions, labels) pairs,
  * both are non-null Arrays, each with unique elements.
- * @since 1.2.0
  */
+@Since("1.2.0")
 class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
 
   /**
@@ -104,8 +105,8 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
   /**
    * Returns precision for a given label (category)
    * @param label the label.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def precision(label: Double): Double = {
     val tp = tpPerClass(label)
     val fp = fpPerClass.getOrElse(label, 0L)
@@ -115,8 +116,8 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
   /**
    * Returns recall for a given label (category)
    * @param label the label.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def recall(label: Double): Double = {
     val tp = tpPerClass(label)
     val fn = fnPerClass.getOrElse(label, 0L)
@@ -126,8 +127,8 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
   /**
    * Returns f1-measure for a given label (category)
    * @param label the label.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def f1Measure(label: Double): Double = {
     val p = precision(label)
     val r = recall(label)

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 063fbed..a7f43f0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
 import org.apache.spark.rdd.RDD
 
@@ -34,8 +34,8 @@ import org.apache.spark.rdd.RDD
  * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance.
  *
  * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])])
   extends Logging with Serializable {
@@ -56,8 +56,8 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    *
    * @param k the position to compute the truncated precision, must be positive
    * @return the average precision at the first k ranking positions
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def precisionAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
     predictionAndLabels.map { case (pred, lab) =>
@@ -126,8 +126,8 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    *
    * @param k the position to compute the truncated ndcg, must be positive
    * @return the average ndcg at the first k ranking positions
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def ndcgAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
     predictionAndLabels.map { case (pred, lab) =>
@@ -165,8 +165,8 @@ object RankingMetrics {
   /**
    * Creates a [[RankingMetrics]] instance (for Java users).
    * @param predictionAndLabels a JavaRDD of (predicted ranking, ground truth set) pairs
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def of[E, T <: jl.Iterable[E]](predictionAndLabels: JavaRDD[(T, T)]): RankingMetrics[E] = {
     implicit val tag = JavaSparkContext.fakeClassTag[E]
     val rdd = predictionAndLabels.rdd.map { case (predictions, labels) =>

http://git-wip-us.apache.org/repos/asf/spark/blob/f5b028ed/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 54dfd8c..36a6c35 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.evaluation
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.Logging
 import org.apache.spark.mllib.linalg.Vectors
@@ -29,8 +29,8 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for regression.
  *
  * @param predictionAndObservations an RDD of (prediction, observation) pairs.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
 
@@ -67,8 +67,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
    * Returns the variance explained by regression.
    * explainedVariance = \sum_i (\hat{y_i} - \bar{y})^2 / n
    * @see [[https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained]]
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def explainedVariance: Double = {
     SSreg / summary.count
   }
@@ -76,8 +76,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns the mean absolute error, which is a risk function corresponding to the
    * expected value of the absolute error loss or l1-norm loss.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def meanAbsoluteError: Double = {
     summary.normL1(1) / summary.count
   }
@@ -85,8 +85,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns the mean squared error, which is a risk function corresponding to the
    * expected value of the squared error loss or quadratic loss.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def meanSquaredError: Double = {
     SSerr / summary.count
   }
@@ -94,8 +94,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns the root mean squared error, which is defined as the square root of
    * the mean squared error.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def rootMeanSquaredError: Double = {
     math.sqrt(this.meanSquaredError)
   }
@@ -103,8 +103,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns R^2^, the unadjusted coefficient of determination.
    * @see [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def r2: Double = {
     1 - SSerr / SStot
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message