Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@mahout.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: vanstee@apache.org
To: commits@mahout.apache.org
Date: Tue, 27 Jun 2017 16:14:42 -0000
Message-Id: <017e6bb648084c4f9457d0d34992e468@git.apache.org>
In-Reply-To: <7a54dd6c14144c2ea76887793d2dc849@git.apache.org>
References: <7a54dd6c14144c2ea76887793d2dc849@git.apache.org>
Subject: [17/52] [partial] mahout git commit: removed all files except for
 website directory
archived-at: Tue, 27 Jun 2017 16:14:39 -0000

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala b/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala
deleted file mode 100644
index 36fc551..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.classifier.naivebayes
-
-import org.apache.mahout.classifier.stats.{ResultAnalyzer, ClassifierResult}
-import org.apache.mahout.math._
-import scalabindings._
-import scalabindings.RLikeOps._
-import drm.RLikeDrmOps._
-import drm._
-import scala.reflect.ClassTag
-import scala.language.asInstanceOf
-import collection._
-import scala.collection.JavaConversions._
-
-/**
- * Distributed training of a Naive Bayes model. Follows the approach presented in Rennie et.al.: Tackling the poor
- * assumptions of Naive Bayes Text classifiers, ICML 2003, http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf
- */
-trait NaiveBayes extends java.io.Serializable{
-
-  /** default value for the Laplacian smoothing parameter */
-  def defaultAlphaI = 1.0f
-
-  // function to extract categories from string keys
-  type CategoryParser = String => String
-
-  /** Default: seqdirectory/seq2Sparse Categories are Stored in Drm Keys as: /Category/document_id */
-  def seq2SparseCategoryParser: CategoryParser = x => x.split("/")(1)
-
-
-  /**
-   * Distributed training of a Naive Bayes model. Follows the approach presented in Rennie et.al.: Tackling the poor
-   * assumptions of Naive Bayes Text classifiers, ICML 2003, http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf
-   *
-   * @param observationsPerLabel a DrmLike[Int] matrix containing term frequency counts for each label.
-   * @param trainComplementary whether or not to train a complementary Naive Bayes model
-   * @param alphaI Laplace smoothing parameter
-   * @return trained naive bayes model
-   */
-  def train(observationsPerLabel: DrmLike[Int],
-            labelIndex: Map[String, Integer],
-            trainComplementary: Boolean = true,
-            alphaI: Float = defaultAlphaI): NBModel = {
-
-    // Summation of all weights per feature
-    val weightsPerFeature = observationsPerLabel.colSums
-
-    // Distributed summation of all weights per label
-    val weightsPerLabel = observationsPerLabel.rowSums
-
-    // Collect a matrix to pass to the NaiveBayesModel
-    val inCoreTFIDF = observationsPerLabel.collect
-
-    // perLabelThetaNormalizer Vector is expected by NaiveBayesModel. We can pass a null value
-    // or Vector of zeroes in the case of a standard NB model.
-    var thetaNormalizer = weightsPerFeature.like()
-
-    // Instantiate a trainer and retrieve the perLabelThetaNormalizer Vector from it in the case of
-    // a complementary NB model
-    if (trainComplementary) {
-      val thetaTrainer = new ComplementaryNBThetaTrainer(weightsPerFeature,
-                                                         weightsPerLabel,
-                                                         alphaI)
-      // local training of the theta normalization
-      for (labelIndex <- 0 until inCoreTFIDF.nrow) {
-        thetaTrainer.train(labelIndex, inCoreTFIDF(labelIndex, ::))
-      }
-      thetaNormalizer = thetaTrainer.retrievePerLabelThetaNormalizer
-    }
-
-    new NBModel(inCoreTFIDF,
-                weightsPerFeature,
-                weightsPerLabel,
-                thetaNormalizer,
-                labelIndex,
-                alphaI,
-                trainComplementary)
-  }
-
-  /**
-   * Extract label Keys from raw TF or TF-IDF Matrix generated by seqdirectory/seq2sparse
-   * and aggregate TF or TF-IDF values by their label
-   * Override this method in engine specific modules to optimize
-   *
-   * @param stringKeyedObservations DrmLike matrix; Output from seq2sparse
-   *   in form K = eg./Category/document_title
-   *           V = TF or TF-IDF values per term
-   * @param cParser a String => String function used to extract categories from
-   *   Keys of the stringKeyedObservations DRM. The default
-   *   CategoryParser will extract "Category" from: '/Category/document_id'
-   * @return  (labelIndexMap,aggregatedByLabelObservationDrm)
-   *   labelIndexMap is a HashMap [String, Integer] K = label row index
-   *                                                V = label
-   *   aggregatedByLabelObservationDrm is a DrmLike[Int] of aggregated
-   *   TF or TF-IDF counts per label
-   */
-  def extractLabelsAndAggregateObservations[K](stringKeyedObservations: DrmLike[K],
-                                                         cParser: CategoryParser = seq2SparseCategoryParser)
-                                                        (implicit ctx: DistributedContext):
-                                                        (mutable.HashMap[String, Integer], DrmLike[Int])= {
-
-    stringKeyedObservations.checkpoint()
-
-    val numDocs=stringKeyedObservations.nrow
-    val numFeatures=stringKeyedObservations.ncol
-
-    // For mapblocks that return K.
-    implicit val ktag = stringKeyedObservations.keyClassTag
-
-    // Extract categories from labels assigned by seq2sparse
-    // Categories are Stored in Drm Keys as eg.: /Category/document_id
-
-    // Get a new DRM with a single column so that we don't have to collect the
-    // DRM into memory upfront.
-    val strippedObeservations = stringKeyedObservations.mapBlock(ncol = 1) {
-      case (keys, block) =>
-        val blockB = block.like(keys.size, 1)
-        keys -> blockB
-    }
-
-    // Extract the row label bindings (the String keys) from the slim Drm
-    // strip the document_id from the row keys keeping only the category.
-    // Sort the bindings alphabetically into a Vector
-    val labelVectorByRowIndex = strippedObeservations
-                                  .getRowLabelBindings
-                                  .map(x => x._2 -> cParser(x._1))
-                                  .toVector.sortWith(_._1 < _._1)
-
-    //TODO: add a .toIntKeyed(...) method to DrmLike?
-
-    // Copy stringKeyedObservations to an Int-Keyed Drm so that we can compute transpose
-    // Copy the Collected Matrices up front for now until we hav a distributed way of converting
-    val inCoreStringKeyedObservations = stringKeyedObservations.collect
-    val inCoreIntKeyedObservations = new SparseMatrix(
-                             stringKeyedObservations.nrow.toInt,
-                             stringKeyedObservations.ncol)
-    for (i <- 0 until inCoreStringKeyedObservations.nrow) {
-      inCoreIntKeyedObservations(i, ::) = inCoreStringKeyedObservations(i, ::)
-    }
-
-    val intKeyedObservations= drmParallelize(inCoreIntKeyedObservations)
-
-    stringKeyedObservations.uncache()
-
-    var labelIndex = 0
-    val labelIndexMap = new mutable.HashMap[String, Integer]
-    val encodedLabelByRowIndexVector = new DenseVector(labelVectorByRowIndex.size)
-    
-    // Encode Categories as an Integer (Double) so we can broadcast as a vector
-    // where each element is an Int-encoded category whose index corresponds
-    // to its row in the Drm
-    for (i <- labelVectorByRowIndex.indices) {
-      if (!labelIndexMap.contains(labelVectorByRowIndex(i)._2)) {
-        encodedLabelByRowIndexVector(i) = labelIndex.toDouble
-        labelIndexMap.put(labelVectorByRowIndex(i)._2, labelIndex)
-        labelIndex += 1
-      }
-      // don't like this casting but need to use a java.lang.Integer when setting rowLabelBindings
-      encodedLabelByRowIndexVector(i) = labelIndexMap
-                                          .getOrElse(labelVectorByRowIndex(i)._2, -1)
-                                          .asInstanceOf[Int].toDouble
-    }
-
-    // "Combiner": Map and aggregate by Category. Do this by broadcasting the encoded
-    // category vector and mapping a transposed IntKeyed Drm out so that all categories
-    // will be present on all nodes as columns and can be referenced by
-    // BCastEncodedCategoryByRowVector.  Iteratively sum all categories.
-    val nLabels = labelIndex
-
-    val bcastEncodedCategoryByRowVector = drmBroadcast(encodedLabelByRowIndexVector)
-
-    val aggregetedObservationByLabelDrm = intKeyedObservations.t.mapBlock(ncol = nLabels) {
-      case (keys, blockA) =>
-        val blockB = blockA.like(keys.size, nLabels)
-        var label : Int = 0
-        for (i <- 0 until keys.size) {
-          blockA(i, ::).nonZeroes().foreach { elem =>
-            label = bcastEncodedCategoryByRowVector.get(elem.index).toInt
-            blockB(i, label) = blockB(i, label) + blockA(i, elem.index)
-          }
-        }
-        keys -> blockB
-    }.t
-
-    (labelIndexMap, aggregetedObservationByLabelDrm)
-  }
-
-  /**
-   * Test a trained model with a labeled dataset sequentially
-   * @param model a trained NBModel
-   * @param testSet a labeled testing set
-   * @param testComplementary test using a complementary or a standard NB classifier
-   * @param cParser a String => String function used to extract categories from
-   *   Keys of the testing set DRM. The default
-   *   CategoryParser will extract "Category" from: '/Category/document_id'
-   *
-   *   *Note*: this method brings the entire test set into upfront memory,
-   *           This method is optimized and parallelized in SparkNaiveBayes
-   *
-   * @tparam K implicitly determined Key type of test set DRM: String
-   * @return a result analyzer with confusion matrix and accuracy statistics
-   */
-  def test[K: ClassTag](model: NBModel,
-                        testSet: DrmLike[K],
-                        testComplementary: Boolean = false,
-                        cParser: CategoryParser = seq2SparseCategoryParser)
-                       (implicit ctx: DistributedContext): ResultAnalyzer = {
-
-    val labelMap = model.labelIndex
-
-    val numLabels = model.numLabels
-
-    testSet.checkpoint()
-
-    val numTestInstances = testSet.nrow.toInt
-
-    // instantiate the correct type of classifier
-    val classifier = testComplementary match {
-      case true => new ComplementaryNBClassifier(model) with Serializable
-      case _ => new StandardNBClassifier(model) with Serializable
-    }
-    
-    if (testComplementary) {
-      assert(testComplementary == model.isComplementary,
-        "Complementary Label Assignment requires Complementary Training")
-    }
-
-
-    // Sequentially assign labels to the test set:
-    // *Note* this brings the entire test set into memory upfront:
-
-    // Since we cant broadcast the model as is do it sequentially up front for now
-    val inCoreTestSet = testSet.collect
-
-    // get the labels of the test set and extract the keys
-    val testSetLabelMap = testSet.getRowLabelBindings
-
-    // empty Matrix in which we'll set the classification scores
-    val inCoreScoredTestSet = testSet.like(numTestInstances, numLabels)
-
-    testSet.uncache()
-    
-    for (i <- 0 until numTestInstances) {
-      inCoreScoredTestSet(i, ::) := classifier.classifyFull(inCoreTestSet(i, ::))
-    }
-
-    // todo: reverse the labelMaps in training and through the model?
-
-    // reverse the label map and extract the labels
-    val reverseTestSetLabelMap = testSetLabelMap.map(x => x._2 -> cParser(x._1))
-
-    val reverseLabelMap = labelMap.map(x => x._2 -> x._1)
-
-    val analyzer = new ResultAnalyzer(labelMap.keys.toList.sorted, "DEFAULT")
-
-    // assign labels- winner takes all
-    for (i <- 0 until numTestInstances) {
-      val (bestIdx, bestScore) = argmax(inCoreScoredTestSet(i, ::))
-      val classifierResult = new ClassifierResult(reverseLabelMap(bestIdx), bestScore)
-      analyzer.addInstance(reverseTestSetLabelMap(i), classifierResult)
-    }
-
-    analyzer
-  }
-
-  /**
-   * argmax with values as well
-   * returns a tuple of index of the max score and the score itself.
-   * @param v Vector of of scores
-   * @return  (bestIndex, bestScore)
-   */
-  def argmax(v: Vector): (Int, Double) = {
-    var bestIdx: Int = Integer.MIN_VALUE
-    var bestScore: Double = Integer.MIN_VALUE.toDouble
-    for(i <- 0 until v.size) {
-      if(v(i) > bestScore){
-        bestScore = v(i)
-        bestIdx = i
-      }
-    }
-    (bestIdx, bestScore)
-  }
-
-}
-
-object NaiveBayes extends NaiveBayes with java.io.Serializable
-
-/**
- * Trainer for the weight normalization vector used by Transform Weight Normalized Complement
- * Naive Bayes.  See: Rennie et.al.: Tackling the poor assumptions of Naive Bayes Text classifiers,
- * ICML 2003, http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf Sec. 3.2.
- *
- * @param weightsPerFeature a Vector of summed TF or TF-IDF weights for each word in dictionary.
- * @param weightsPerLabel a Vector of summed TF or TF-IDF weights for each label.
- * @param alphaI Laplace smoothing factor. Defaut value of 1.
- */
-class ComplementaryNBThetaTrainer(private val weightsPerFeature: Vector,
-                                  private val weightsPerLabel: Vector,
-                                  private val alphaI: Double = 1.0) {
-                                   
-   private val perLabelThetaNormalizer: Vector = weightsPerLabel.like()
-   private val totalWeightSum: Double = weightsPerLabel.zSum
-   private val numFeatures: Double = weightsPerFeature.getNumNondefaultElements
-
-   assert(weightsPerFeature != null, "weightsPerFeature vector can not be null")
-   assert(weightsPerLabel != null, "weightsPerLabel vector can not be null")
-
-  /**
-   * Train the weight normalization vector for each label
-   * @param label
-   * @param featurePerLabelWeight
-   */
-  def train(label: Int, featurePerLabelWeight: Vector) {
-    val currentLabelWeight = labelWeight(label)
-    // sum weights for each label including those with zero word counts
-    for (i <- 0 until featurePerLabelWeight.size) {
-      val currentFeaturePerLabelWeight = featurePerLabelWeight(i)
-      updatePerLabelThetaNormalizer(label,
-        ComplementaryNBClassifier.computeWeight(featureWeight(i),
-                                                currentFeaturePerLabelWeight,
-                                                totalWeightSum,
-                                                currentLabelWeight,
-                                                alphaI,
-                                                numFeatures)
-                                   )
-    }
-  }
-
-  /**
-   * getter for summed TF or TF-IDF weights by label
-   * @param label index of label
-   * @return sum of word TF or TF-IDF weights for label
-   */
-  def labelWeight(label: Int): Double = {
-    weightsPerLabel(label)
-  }
-
-  /**
-   * getter for summed TF or TF-IDF weights by word.
-   * @param feature index of word.
-   * @return sum of TF or TF-IDF weights for word.
-   */
-  def featureWeight(feature: Int): Double = {
-    weightsPerFeature(feature)
-  }
-
-  /**
-   * add the magnitude of the current weight to the current
-   * label's corresponding Vector element.
-   * @param label index of label to update.
-   * @param weight weight to add.
-   */
-  def updatePerLabelThetaNormalizer(label: Int, weight: Double) {
-    perLabelThetaNormalizer(label) = perLabelThetaNormalizer(label) + Math.abs(weight)
-  }
-
-  /**
-   * Getter for the weight normalizer vector as indexed by label
-   * @return a copy of the weight normalizer vector.
-   */
-  def retrievePerLabelThetaNormalizer: Vector = {
-    perLabelThetaNormalizer.cloned
-  }
-
-
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala b/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala
deleted file mode 100644
index 8f1413a..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-package org.apache.mahout.classifier.stats
-
-import java.text.{DecimalFormat, NumberFormat}
-import java.util
-import org.apache.mahout.math.stats.OnlineSummarizer
-
-
-/**
- * Result of a document classification. The label and the associated score (usually probabilty)
- */
-class ClassifierResult (private var label: String = null,
-                        private var score: Double = 0.0,
-                        private var logLikelihood: Double = Integer.MAX_VALUE.toDouble) {
-
-  def getLogLikelihood: Double = logLikelihood
-
-  def setLogLikelihood(llh: Double) {
-    logLikelihood = llh
-  }
-
-  def getLabel: String = label
-
-  def getScore: Double = score
-
-  def setLabel(lbl: String) {
-    label = lbl
-  }
-
-  def setScore(sc: Double) {
-    score = sc
-  }
-
-  override def toString: String = {
-     "ClassifierResult{" + "category='" + label + '\'' + ", score=" + score + '}'
-  }
-
-}
-
-/**
- * ResultAnalyzer captures the classification statistics and displays in a tabular manner
- * @param labelSet Set of labels to be considered in classification
- * @param defaultLabel  the default label for an unknown classification
- */
-class ResultAnalyzer(private val labelSet: util.Collection[String], defaultLabel: String) {
-
-  val confusionMatrix = new ConfusionMatrix(labelSet, defaultLabel)
-  val summarizer = new OnlineSummarizer
-
-  private var hasLL: Boolean = false
-  private var correctlyClassified: Int = 0
-  private var incorrectlyClassified: Int = 0
-
-
-  def getConfusionMatrix: ConfusionMatrix = confusionMatrix
-
-  /**
-   *
-   * @param correctLabel
-   * The correct label
-   * @param classifiedResult
-   * The classified result
-   * @return whether the instance was correct or not
-   */
-  def addInstance(correctLabel: String, classifiedResult: ClassifierResult): Boolean = {
-    val result: Boolean = correctLabel == classifiedResult.getLabel
-    if (result) {
-      correctlyClassified += 1
-    }
-    else {
-      incorrectlyClassified += 1
-    }
-    confusionMatrix.addInstance(correctLabel, classifiedResult)
-    if (classifiedResult.getLogLikelihood != Integer.MAX_VALUE.toDouble) {
-      summarizer.add(classifiedResult.getLogLikelihood)
-      hasLL = true
-    }
-
-    result
-  }
-
-  /** Dump the resulting statistics to a string */
-  override def toString: String = {
-    val returnString: StringBuilder = new StringBuilder
-    returnString.append('\n')
-    returnString.append("=======================================================\n")
-    returnString.append("Summary\n")
-    returnString.append("-------------------------------------------------------\n")
-    val totalClassified: Int = correctlyClassified + incorrectlyClassified
-    val percentageCorrect: Double = 100.asInstanceOf[Double] * correctlyClassified / totalClassified
-    val percentageIncorrect: Double = 100.asInstanceOf[Double] * incorrectlyClassified / totalClassified
-    val decimalFormatter: NumberFormat = new DecimalFormat("0.####")
-    returnString.append("Correctly Classified Instances")
-                .append(": ")
-                .append(Integer.toString(correctlyClassified))
-                .append('\t')
-                .append(decimalFormatter.format(percentageCorrect))
-                .append("%\n")
-    returnString.append("Incorrectly Classified Instances")
-                .append(": ")
-                .append(Integer.toString(incorrectlyClassified))
-                .append('\t')
-                .append(decimalFormatter.format(percentageIncorrect))
-                .append("%\n")
-    returnString.append("Total Classified Instances")
-                .append(": ")
-                .append(Integer.toString(totalClassified))
-                .append('\n')
-    returnString.append('\n')
-    returnString.append(confusionMatrix)
-    returnString.append("=======================================================\n")
-    returnString.append("Statistics\n")
-    returnString.append("-------------------------------------------------------\n")
-    val normStats: RunningAverageAndStdDev = confusionMatrix.getNormalizedStats
-    returnString.append("Kappa: \t")
-                .append(decimalFormatter.format(confusionMatrix.getKappa))
-                .append('\n')
-    returnString.append("Accuracy: \t")
-                .append(decimalFormatter.format(confusionMatrix.getAccuracy))
-                .append("%\n")
-    returnString.append("Reliability: \t")
-                .append(decimalFormatter.format(normStats.getAverage * 100.00000001))
-                .append("%\n")
-    returnString.append("Reliability (std dev): \t")
-                .append(decimalFormatter.format(normStats.getStandardDeviation))
-                .append('\n')
-    returnString.append("Weighted precision: \t")
-                .append(decimalFormatter.format(confusionMatrix.getWeightedPrecision))
-                .append('\n')
-    returnString.append("Weighted recall: \t")
-                .append(decimalFormatter.format(confusionMatrix.getWeightedRecall))
-                .append('\n')
-    returnString.append("Weighted F1 score: \t")
-                .append(decimalFormatter.format(confusionMatrix.getWeightedF1score))
-                .append('\n')
-    if (hasLL) {
-      returnString.append("Log-likelihood: \t")
-                  .append("mean      :  \t")
-                  .append(decimalFormatter.format(summarizer.getMean))
-                  .append('\n')
-      returnString.append("25%-ile   :  \t")
-                  .append(decimalFormatter.format(summarizer.getQuartile(1)))
-                  .append('\n')
-      returnString.append("75%-ile   :  \t")
-                  .append(decimalFormatter.format(summarizer.getQuartile(3)))
-                  .append('\n')
-    }
-
-    returnString.toString()
-  }
-
-
-}
-
-/**
- *
- * Interface for classes that can keep track of a running average of a series of numbers. One can add to or
- * remove from the series, as well as update a datum in the series. The class does not actually keep track of
- * the series of values, just its running average, so it doesn't even matter if you remove/change a value that
- * wasn't added.
- *
- * Ported from org.apache.mahout.cf.taste.impl.common.RunningAverage.java
- */
-trait RunningAverage {
-
-  /**
-   * @param datum
-   * new item to add to the running average
-   * @throws IllegalArgumentException
-   * if datum is { @link Double#NaN}
-   */
-  def addDatum(datum: Double)
-
-  /**
-   * @param datum
-   * item to remove to the running average
-   * @throws IllegalArgumentException
-   * if datum is { @link Double#NaN}
-   * @throws IllegalStateException
-   * if count is 0
-   */
-  def removeDatum(datum: Double)
-
-  /**
-   * @param delta
-   * amount by which to change a datum in the running average
-   * @throws IllegalArgumentException
-   * if delta is { @link Double#NaN}
-   * @throws IllegalStateException
-   * if count is 0
-   */
-  def changeDatum(delta: Double)
-
-  def getCount: Int
-
-  def getAverage: Double
-
-  /**
-   * @return a (possibly immutable) object whose average is the negative of this object's
-   */
-  def inverse: RunningAverage
-}
-
-/**
- *
- * Extends {@link RunningAverage} by adding standard deviation too.
- *
- * Ported from org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev.java
- */
-trait RunningAverageAndStdDev extends RunningAverage {
-
-  /** @return standard deviation of data */
-  def getStandardDeviation: Double
-
-  /**
-   * @return a (possibly immutable) object whose average is the negative of this object's
-   */
-  def inverse: RunningAverageAndStdDev
-}
-
-
-class InvertedRunningAverage(private val delegate: RunningAverage) extends RunningAverage {
-
-  override def addDatum(datum: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  override def removeDatum(datum: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  override def changeDatum(delta: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  override def getCount: Int = {
-     delegate.getCount
-  }
-
-  override def getAverage: Double = {
-     -delegate.getAverage
-  }
-
-  override def inverse: RunningAverage = {
-     delegate
-  }
-}
-
-
-/**
- *
- * A simple class that can keep track of a running average of a series of numbers. One can add to or remove
- * from the series, as well as update a datum in the series. The class does not actually keep track of the
- * series of values, just its running average, so it doesn't even matter if you remove/change a value that
- * wasn't added.
- *
- * Ported from org.apache.mahout.cf.taste.impl.common.FullRunningAverage.java
- */
-class FullRunningAverage(private var count: Int = 0,
-                         private var average: Double = Double.NaN ) extends RunningAverage {
-
-  /**
-   * @param datum
-   * new item to add to the running average
-   */
-  override def addDatum(datum: Double) {
-    count += 1
-    if (count == 1) {
-      average = datum
-    }
-    else {
-      average = average * (count - 1) / count + datum / count
-    }
-  }
-
-  /**
-   * @param datum
-   * item to remove from the running average
-   * @throws IllegalStateException
-   * if count is 0
-   */
-  override def removeDatum(datum: Double) {
-    if (count == 0) {
-      throw new IllegalStateException
-    }
-    count -= 1
-    if (count == 0) {
-      average = Double.NaN
-    }
-    else {
-      average = average * (count + 1) / count - datum / count
-    }
-  }
-
-  /**
-   * @param delta
-   * amount by which to change a datum in the running average
-   * @throws IllegalStateException
-   * if count is 0
-   */
-  override def changeDatum(delta: Double) {
-    if (count == 0) {
-      throw new IllegalStateException
-    }
-    average += delta / count
-  }
-
-  override def getCount: Int = {
-    count
-  }
-
-  override def getAverage: Double = {
-    average
-  }
-
-  override def inverse: RunningAverage = {
-    new InvertedRunningAverage(this)
-  }
-
-  override def toString: String = {
-    String.valueOf(average)
-  }
-}
-
-
-/**
- *
- * Extends {@link FullRunningAverage} to add a running standard deviation computation.
- * Uses Welford's method, as described at http://www.johndcook.com/standard_deviation.html
- *
- * Ported from org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev.java
- */
-class FullRunningAverageAndStdDev(private var count: Int = 0,
-                                  private var average: Double = 0.0,
-                                  private var mk: Double = 0.0,
-                                  private var sk: Double = 0.0) extends FullRunningAverage with RunningAverageAndStdDev {
-
-  var stdDev: Double = 0.0
-
-  recomputeStdDev
-
-  def getMk: Double = {
-     mk
-  }
-
-  def getSk: Double = {
-    sk
-  }
-
-  override def getStandardDeviation: Double = {
-    stdDev
-  }
-
-  override def addDatum(datum: Double) {
-    super.addDatum(datum)
-    val count: Int = getCount
-    if (count == 1) {
-      mk = datum
-      sk = 0.0
-    }
-    else {
-      val oldmk: Double = mk
-      val diff: Double = datum - oldmk
-      mk += diff / count
-      sk += diff * (datum - mk)
-    }
-    recomputeStdDev
-  }
-
-  override def removeDatum(datum: Double) {
-    val oldCount: Int = getCount
-    super.removeDatum(datum)
-    val oldmk: Double = mk
-    mk = (oldCount * oldmk - datum) / (oldCount - 1)
-    sk -= (datum - mk) * (datum - oldmk)
-    recomputeStdDev
-  }
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-  override def changeDatum(delta: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  private def recomputeStdDev {
-    val count: Int = getCount
-    stdDev = if (count > 1) Math.sqrt(sk / (count - 1)) else Double.NaN
-  }
-
-  override def inverse: RunningAverageAndStdDev = {
-     new InvertedRunningAverageAndStdDev(this)
-  }
-
-  override def toString: String = {
-     String.valueOf(String.valueOf(getAverage) + ',' + stdDev)
-  }
-
-}
-
-
-/**
- *
- * @param delegate RunningAverageAndStdDev instance
- *
- * Ported from org.apache.mahout.cf.taste.impl.common.InvertedRunningAverageAndStdDev.java
- */
-class InvertedRunningAverageAndStdDev(private val delegate: RunningAverageAndStdDev) extends RunningAverageAndStdDev {
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-  override def addDatum(datum: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-
-  override def removeDatum(datum: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-  override def changeDatum(delta: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  override def getCount: Int = {
-     delegate.getCount
-  }
-
-  override def getAverage: Double = {
-     -delegate.getAverage
-  }
-
-  override def getStandardDeviation: Double = {
-     delegate.getStandardDeviation
-  }
-
-  override def inverse: RunningAverageAndStdDev = {
-     delegate
-  }
-}
-
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala b/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala
deleted file mode 100644
index d421fa1..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-package org.apache.mahout.classifier.stats
-
-import java.util
-import org.apache.commons.math3.stat.descriptive.moment.Mean // This is brought in by mahout-math
-import org.apache.mahout.math.{DenseMatrix, Matrix}
-import scala.collection.mutable
-import scala.collection.JavaConversions._
-
-/**
- *
- * Ported from org.apache.mahout.classifier.ConfusionMatrix.java
- *
- * The ConfusionMatrix Class stores the result of Classification of a Test Dataset.
- *
- * The fact of whether there is a default is not stored. A row of zeros is the only indicator that there is no default.
- *
- * See http://en.wikipedia.org/wiki/Confusion_matrix for background
- *
- *
- * @param labels The labels to consider for classification
- * @param defaultLabel default unknown label
- */
-class ConfusionMatrix(private var labels: util.Collection[String] = null,
-                      private var defaultLabel: String = "unknown")  {
-  /**
-   * Matrix Constructor
-   */
-//   def this(m: Matrix) {
-//     this()
-//     confusionMatrix = Array.ofDim[Int](m.numRows, m.numRows)
-//     setMatrix(m)
-//   }
-
-   // val LOG: Logger = LoggerFactory.getLogger(classOf[ConfusionMatrix])
-
-  var confusionMatrix = Array.ofDim[Int](labels.size + 1, labels.size + 1)
-
-  val labelMap = new mutable.HashMap[String,Integer]()
-
-  var samples: Int = 0
-
-  var i: Integer = 0
-  for (label <- labels) {
-    labelMap.put(label, i)
-    i+=1
-  }
-  labelMap.put(defaultLabel, i)
-
-
-  def getConfusionMatrix: Array[Array[Int]] = confusionMatrix
-
-  def getLabels = labelMap.keys.toList
-
-  def numLabels: Int = labelMap.size
-
-  def getAccuracy(label: String): Double = {
-    val labelId: Int = labelMap(label)
-    var labelTotal: Int = 0
-    var correct: Int = 0
-    for (i <- 0 until numLabels) {
-      labelTotal += confusionMatrix(labelId)(i)
-      if (i == labelId) {
-        correct += confusionMatrix(labelId)(i)
-      }
-    }
-
-    100.0 * correct / labelTotal
-  }
-
-  def getAccuracy: Double = {
-    var total: Int = 0
-    var correct: Int = 0
-    for (i <- 0 until numLabels) {
-      for (j <- 0 until numLabels) {
-        total += confusionMatrix(i)(j)
-        if (i == j) {
-          correct += confusionMatrix(i)(j)
-        }
-      }
-    }
-
-    100.0 * correct / total
-  }
-
-  /** Sum of true positives and false negatives */
-  private def getActualNumberOfTestExamplesForClass(label: String): Int = {
-    val labelId: Int = labelMap(label)
-    var sum: Int = 0
-    for (i <- 0 until numLabels) {
-      sum += confusionMatrix(labelId)(i)
-    }
-    sum
-  }
-
-  def getPrecision(label: String): Double = {
-    val labelId: Int = labelMap(label)
-    val truePositives: Int = confusionMatrix(labelId)(labelId)
-    var falsePositives: Int = 0
-
-    for (i <- 0 until numLabels) {
-      if (i != labelId) {
-        falsePositives += confusionMatrix(i)(labelId)
-      }
-    }
-
-    if (truePositives + falsePositives == 0) {
-      0
-    } else {
-      truePositives.asInstanceOf[Double] / (truePositives + falsePositives)
-    }
-  }
-
-
-  def getWeightedPrecision: Double = {
-    val precisions: Array[Double] = new Array[Double](numLabels)
-    val weights: Array[Double] = new Array[Double](numLabels)
-    var index: Int = 0
-    for (label <- labelMap.keys) {
-      precisions(index) = getPrecision(label)
-      weights(index) = getActualNumberOfTestExamplesForClass(label)
-      index += 1
-    }
-    new Mean().evaluate(precisions, weights)
-  }
-
-  def getRecall(label: String): Double = {
-    val labelId: Int = labelMap(label)
-    val truePositives: Int = confusionMatrix(labelId)(labelId)
-    var falseNegatives: Int = 0
-    for (i <- 0 until numLabels) {
-      if (i != labelId) {
-        falseNegatives += confusionMatrix(labelId)(i)
-      }
-    }
-
-    if (truePositives + falseNegatives == 0) {
-      0
-    } else {
-      truePositives.asInstanceOf[Double] / (truePositives + falseNegatives)
-    }
-  }
-
-  def getWeightedRecall: Double = {
-    val recalls: Array[Double] = new Array[Double](numLabels)
-    val weights: Array[Double] = new Array[Double](numLabels)
-    var index: Int = 0
-    for (label <- labelMap.keys) {
-      recalls(index) = getRecall(label)
-      weights(index) = getActualNumberOfTestExamplesForClass(label)
-      index += 1
-    }
-    new Mean().evaluate(recalls, weights)
-  }
-
-  def getF1score(label: String): Double = {
-    val precision: Double = getPrecision(label)
-    val recall: Double = getRecall(label)
-    if (precision + recall == 0) {
-      0
-    } else {
-      2 * precision * recall / (precision + recall)
-    }
-  }
-
-  def getWeightedF1score: Double = {
-    val f1Scores: Array[Double] = new Array[Double](numLabels)
-    val weights: Array[Double] = new Array[Double](numLabels)
-    var index: Int = 0
-    for (label <- labelMap.keys) {
-      f1Scores(index) = getF1score(label)
-      weights(index) = getActualNumberOfTestExamplesForClass(label)
-      index += 1
-    }
-    new Mean().evaluate(f1Scores, weights)
-  }
-
-  def getReliability: Double = {
-    var count: Int = 0
-    var accuracy: Double = 0
-    for (label <- labelMap.keys) {
-      if (!(label == defaultLabel)) {
-        accuracy += getAccuracy(label)
-      }
-      count += 1
-    }
-    accuracy / count
-  }
-
-  /**
-   * Accuracy v.s. randomly classifying all samples.
-   * kappa() = (totalAccuracy() - randomAccuracy()) / (1 - randomAccuracy())
-   * Cohen, Jacob. 1960. A coefficient of agreement for nominal scales.
-   * Educational And Psychological Measurement 20:37-46.
-   *
-   * Formula and variable names from:
-   * http://www.yale.edu/ceo/OEFS/Accuracy.pdf
-   *
-   * @return double
-   */
-  def getKappa: Double = {
-    var a: Double = 0.0
-    var b: Double = 0.0
-    for (i <- confusionMatrix.indices) {
-      a += confusionMatrix(i)(i)
-      var br: Int = 0
-      for (j <- confusionMatrix.indices) {
-        br += confusionMatrix(i)(j)
-      }
-      var bc: Int = 0
-      //TODO: verify this as an iterator
-      for (vec <- confusionMatrix) {
-        bc += vec(i)
-      }
-      b += br * bc
-    }
-    (samples * a - b) / (samples * samples - b)
-  }
-
-  def getCorrect(label: String): Int = {
-    val labelId: Int = labelMap(label)
-    confusionMatrix(labelId)(labelId)
-  }
-
-  def getTotal(label: String): Int = {
-    val labelId: Int = labelMap(label)
-    var labelTotal: Int = 0
-    for (i <- 0 until numLabels) {
-      labelTotal += confusionMatrix(labelId)(i)
-    }
-    labelTotal
-  }
-
-  /**
-   * Standard deviation of normalized producer accuracy
-   * Not a standard score
-   * @return double
-   */
-  def getNormalizedStats: RunningAverageAndStdDev = {
-    val summer = new FullRunningAverageAndStdDev()
-    for (d <- confusionMatrix.indices) {
-      var total: Double = 0.0
-      for (j <- confusionMatrix.indices) {
-        total += confusionMatrix(d)(j)
-      }
-      summer.addDatum(confusionMatrix(d)(d) / (total + 0.000001))
-    }
-    summer
-  }
-
-  def addInstance(correctLabel: String, classifiedResult: ClassifierResult): Unit = {
-    samples += 1
-    incrementCount(correctLabel, classifiedResult.getLabel)
-  }
-
-  def addInstance(correctLabel: String, classifiedLabel: String): Unit = {
-    samples += 1
-    incrementCount(correctLabel, classifiedLabel)
-  }
-
-  def getCount(correctLabel: String, classifiedLabel: String): Int = {
-    if (!labelMap.containsKey(correctLabel)) {
-    //  LOG.warn("Label {} did not appear in the training examples", correctLabel)
-      return 0
-    }
-    assert(labelMap.containsKey(classifiedLabel), "Label not found: " + classifiedLabel)
-    val correctId: Int = labelMap(correctLabel)
-    val classifiedId: Int = labelMap(classifiedLabel)
-    confusionMatrix(correctId)(classifiedId)
-  }
-
-  def putCount(correctLabel: String, classifiedLabel: String, count: Int): Unit = {
-    if (!labelMap.containsKey(correctLabel)) {
-    //  LOG.warn("Label {} did not appear in the training examples", correctLabel)
-      return
-    }
-    assert(labelMap.containsKey(classifiedLabel), "Label not found: " + classifiedLabel)
-    val correctId: Int = labelMap(correctLabel)
-    val classifiedId: Int = labelMap(classifiedLabel)
-    if (confusionMatrix(correctId)(classifiedId) == 0.0 && count != 0) {
-      samples += 1
-    }
-    confusionMatrix(correctId)(classifiedId) = count
-  }
-
-  def incrementCount(correctLabel: String, classifiedLabel: String, count: Int): Unit = {
-    putCount(correctLabel, classifiedLabel, count + getCount(correctLabel, classifiedLabel))
-  }
-
-  def incrementCount(correctLabel: String, classifiedLabel: String): Unit = {
-    incrementCount(correctLabel, classifiedLabel, 1)
-  }
-
-  def getDefaultLabel: String = {
-    defaultLabel
-  }
-
-  def merge(b: ConfusionMatrix): ConfusionMatrix = {
-    assert(labelMap.size == b.getLabels.size, "The label sizes do not match")
-    for (correctLabel <- this.labelMap.keys) {
-      for (classifiedLabel <- this.labelMap.keys) {
-        incrementCount(correctLabel, classifiedLabel, b.getCount(correctLabel, classifiedLabel))
-      }
-    }
-    this
-  }
-
-  def getMatrix: Matrix = {
-    val length: Int = confusionMatrix.length
-    val m: Matrix = new DenseMatrix(length, length)
-
-    val labels: java.util.HashMap[String, Integer] = new java.util.HashMap()
-
-    for (r <- 0 until length) {
-      for (c <- 0 until length) {
-        m.set(r, c, confusionMatrix(r)(c))
-      }
-    }
-
-    for (entry <- labelMap.entrySet) {
-      labels.put(entry.getKey, entry.getValue)
-    }
-    m.setRowLabelBindings(labels)
-    m.setColumnLabelBindings(labels)
-
-    m
-  }
-
-  def setMatrix(m: Matrix) : Unit = {
-    val length: Int = confusionMatrix.length
-    if (m.numRows != m.numCols) {
-      throw new IllegalArgumentException("ConfusionMatrix: matrix(" + m.numRows + ',' + m.numCols + ") must be square")
-    }
-
-    for (r <- 0 until length) {
-      for (c <- 0 until length) {
-        confusionMatrix(r)(c) = Math.round(m.get(r, c)).toInt
-      }
-    }
-
-    var labels = m.getRowLabelBindings
-    if (labels == null) {
-      labels = m.getColumnLabelBindings
-    }
-
-    if (labels != null) {
-      val sorted: Array[String] = sortLabels(labels)
-      verifyLabels(length, sorted)
-      labelMap.clear
-      for (i <- 0 until length) {
-        labelMap.put(sorted(i), i)
-      }
-    }
-  }
-
-  def verifyLabels(length: Int, sorted: Array[String]): Unit = {
-    assert(sorted.length == length, "One label, one row")
-    for (i <- 0 until length) {
-      if (sorted(i) == null) {
-        assert(assertion = false, "One label, one row")
-      }
-    }
-  }
-
-  def sortLabels(labels: java.util.Map[String, Integer]): Array[String] = {
-    val sorted: Array[String] = new Array[String](labels.size)
-    for (entry <- labels.entrySet) {
-      sorted(entry.getValue) = entry.getKey
-    }
-
-    sorted
-  }
-
-  /**
-   * This is overloaded. toString() is not a formatted report you print for a manager :)
-   * Assume that if there are no default assignments, the default feature was not used
-   */
-  override def toString: String = {
-
-    val returnString: StringBuilder = new StringBuilder(200)
-
-    returnString.append("=======================================================").append('\n')
-    returnString.append("Confusion Matrix\n")
-    returnString.append("-------------------------------------------------------").append('\n')
-
-    val unclassified: Int = getTotal(defaultLabel)
-
-    for (entry <- this.labelMap.entrySet) {
-      if (!((entry.getKey == defaultLabel) && unclassified == 0)) {
-        returnString.append(getSmallLabel(entry.getValue) + "     ").append('\t')
-      }
-    }
-
-    returnString.append("<--Classified as").append('\n')
-
-    for (entry <- this.labelMap.entrySet) {
-      if (!((entry.getKey == defaultLabel) && unclassified == 0)) {
-        val correctLabel: String = entry.getKey
-        var labelTotal: Int = 0
-
-        for (classifiedLabel <- this.labelMap.keySet) {
-          if (!((classifiedLabel == defaultLabel) && unclassified == 0)) {
-            returnString.append(Integer.toString(getCount(correctLabel, classifiedLabel)) + "     ")
-                        .append('\t')
-            labelTotal += getCount(correctLabel, classifiedLabel)
-          }
-        }
-        returnString.append(" |  ").append(String.valueOf(labelTotal) + "      ")
-                    .append('\t')
-                    .append(getSmallLabel(entry.getValue) + "     ")
-                    .append(" = ")
-                    .append(correctLabel)
-                    .append('\n')
-      }
-    }
-
-    if (unclassified > 0) {
-      returnString.append("Default Category: ")
-                  .append(defaultLabel)
-                  .append(": ")
-                  .append(unclassified)
-                  .append('\n')
-    }
-    returnString.append('\n')
-
-    returnString.toString()
-  }
-
-
-  def getSmallLabel(i: Int): String = {
-    var value: Int = i
-    val returnString: StringBuilder = new StringBuilder
-    do {
-      val n: Int = value % 26
-      returnString.insert(0, ('a' + n).asInstanceOf[Char])
-      value /= 26
-    } while (value > 0)
-
-    returnString.toString()
-  }
-
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/common/io/GenericMatrixKryoSerializer.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/common/io/GenericMatrixKryoSerializer.scala b/math-scala/src/main/scala/org/apache/mahout/common/io/GenericMatrixKryoSerializer.scala
deleted file mode 100644
index 534d37c..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/common/io/GenericMatrixKryoSerializer.scala
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.common.io
-
-import com.esotericsoftware.kryo.io.{Input, Output}
-import com.esotericsoftware.kryo.{Kryo, Serializer}
-import org.apache.log4j.Logger
-import org.apache.mahout.logging._
-import org.apache.mahout.math._
-import org.apache.mahout.math.flavor.TraversingStructureEnum
-import org.apache.mahout.math.scalabindings.RLikeOps._
-import org.apache.mahout.math.scalabindings._
-
-import scala.collection.JavaConversions._
-
-object GenericMatrixKryoSerializer {
-
-  private implicit final val log = Logger.getLogger(classOf[GenericMatrixKryoSerializer])
-
-}
-
-/** Serializes Sparse or Dense in-core generic matrix (row-wise or column-wise backed) */
-class GenericMatrixKryoSerializer extends Serializer[Matrix] {
-
-  import GenericMatrixKryoSerializer._
-
-  override def write(kryo: Kryo, output: Output, mx: Matrix): Unit = {
-
-    debug(s"Writing mx of type ${mx.getClass.getName}")
-
-    val structure = mx.getFlavor.getStructure
-
-    // Write structure bit
-    output.writeInt(structure.ordinal(), true)
-
-    // Write geometry
-    output.writeInt(mx.nrow, true)
-    output.writeInt(mx.ncol, true)
-
-    // Write in most efficient traversal order (using backing vectors perhaps)
-    structure match {
-      case TraversingStructureEnum.COLWISE => writeRowWise(kryo, output, mx.t)
-      case TraversingStructureEnum.SPARSECOLWISE => writeSparseRowWise(kryo, output, mx.t)
-      case TraversingStructureEnum.SPARSEROWWISE => writeSparseRowWise(kryo, output, mx)
-      case TraversingStructureEnum.VECTORBACKED => writeVectorBacked(kryo, output, mx)
-      case _ => writeRowWise(kryo, output, mx)
-    }
-
-  }
-
-  private def writeVectorBacked(kryo: Kryo, output: Output, mx: Matrix) {
-
-    require(mx != null)
-
-    // At this point we are just doing some vector-backed classes individually. TODO: create
-    // api to obtain vector-backed matrix data.
-    kryo.writeClass(output, mx.getClass)
-    mx match {
-      case mxD: DiagonalMatrix => kryo.writeObject(output, mxD.diagv)
-      case mxS: DenseSymmetricMatrix => kryo.writeObject(output, dvec(mxS.getData))
-      case mxT: UpperTriangular => kryo.writeObject(output, dvec(mxT.getData))
-      case _ => throw new IllegalArgumentException(s"Unsupported matrix type:${mx.getClass.getName}")
-    }
-  }
-
-  private def readVectorBacked(kryo: Kryo, input: Input, nrow: Int, ncol: Int) = {
-
-    // We require vector-backed matrices to have vector-parameterized constructor to construct.
-    val clazz = kryo.readClass(input).getType
-
-    debug(s"Deserializing vector-backed mx of type ${clazz.getName}.")
-
-    clazz.getConstructor(classOf[Vector]).newInstance(kryo.readObject(input, classOf[Vector])).asInstanceOf[Matrix]
-  }
-
-  private def writeRowWise(kryo: Kryo, output: Output, mx: Matrix): Unit = {
-    for (row <- mx) kryo.writeObject(output, row)
-  }
-
-  private def readRows(kryo: Kryo, input: Input, nrow: Int) = {
-    Array.tabulate(nrow) { _ => kryo.readObject(input, classOf[Vector])}
-  }
-
-  private def readSparseRows(kryo: Kryo, input: Input) = {
-
-    // Number of slices
-    val nslices = input.readInt(true)
-
-    Array.tabulate(nslices) { _ =>
-      input.readInt(true) -> kryo.readObject(input, classOf[Vector])
-    }
-  }
-
-  private def writeSparseRowWise(kryo: Kryo, output: Output, mx: Matrix): Unit = {
-
-    val nslices = mx.numSlices()
-
-    output.writeInt(nslices, true)
-
-    var actualNSlices = 0
-    for (row <- mx.iterateNonEmpty()) {
-      output.writeInt(row.index(), true)
-      kryo.writeObject(output, row.vector())
-      actualNSlices += 1
-    }
-
-    require(nslices == actualNSlices, "Number of slices reported by Matrix.numSlices() was different from actual " +
-      "slice iterator size.")
-  }
-
-  override def read(kryo: Kryo, input: Input, mxClass: Class[Matrix]): Matrix = {
-
-    // Read structure hint
-    val structure = TraversingStructureEnum.values()(input.readInt(true))
-
-    // Read geometry
-    val nrow = input.readInt(true)
-    val ncol = input.readInt(true)
-
-    debug(s"read matrix geometry: $nrow x $ncol.")
-
-    structure match {
-
-      // Sparse or dense column wise
-      case TraversingStructureEnum.COLWISE =>
-        val cols = readRows(kryo, input, ncol)
-
-        if (!cols.isEmpty && cols.head.isDense)
-          dense(cols).t
-        else {
-          debug("Deserializing as SparseRowMatrix.t (COLWISE).")
-          new SparseRowMatrix(ncol, nrow, cols, true, false).t
-        }
-
-      // transposed SparseMatrix case
-      case TraversingStructureEnum.SPARSECOLWISE =>
-        val cols = readSparseRows(kryo, input)
-        val javamap = new java.util.HashMap[Integer, Vector]((cols.size << 1) + 1)
-        cols.foreach { case (idx, vec) => javamap.put(idx, vec)}
-
-        debug("Deserializing as SparseMatrix.t (SPARSECOLWISE).")
-        new SparseMatrix(ncol, nrow, javamap, true).t
-
-      // Sparse Row-wise -- this will be created as a SparseMatrix.
-      case TraversingStructureEnum.SPARSEROWWISE =>
-        val rows = readSparseRows(kryo, input)
-        val javamap = new java.util.HashMap[Integer, Vector]((rows.size << 1) + 1)
-        rows.foreach { case (idx, vec) => javamap.put(idx, vec)}
-
-        debug("Deserializing as SparseMatrix (SPARSEROWWISE).")
-        new SparseMatrix(nrow, ncol, javamap, true)
-      case TraversingStructureEnum.VECTORBACKED =>
-
-        debug("Deserializing vector-backed...")
-        readVectorBacked(kryo, input, nrow, ncol)
-
-      // By default, read row-wise.
-      case _ =>
-        val cols = readRows(kryo, input, nrow)
-        // this still copies a lot of stuff...
-        if (!cols.isEmpty && cols.head.isDense) {
-
-          debug("Deserializing as DenseMatrix.")
-          dense(cols)
-        } else {
-
-          debug("Deserializing as SparseRowMatrix(default).")
-          new SparseRowMatrix(nrow, ncol, cols, true, false)
-        }
-    }
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/common/io/VectorKryoSerializer.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/common/io/VectorKryoSerializer.scala b/math-scala/src/main/scala/org/apache/mahout/common/io/VectorKryoSerializer.scala
deleted file mode 100644
index 3cc537c..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/common/io/VectorKryoSerializer.scala
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.common.io
-
-import com.esotericsoftware.kryo.io.{Input, Output}
-import com.esotericsoftware.kryo.{Kryo, Serializer}
-import org.apache.mahout.logging._
-import org.apache.mahout.math._
-import org.apache.mahout.math.scalabindings.RLikeOps._
-
-import scala.collection.JavaConversions._
-
-
-object VectorKryoSerializer {
-
-  final val FLAG_DENSE: Int = 0x01
-  final val FLAG_SEQUENTIAL: Int = 0x02
-  final val FLAG_NAMED: Int = 0x04
-  final val FLAG_LAX_PRECISION: Int = 0x08
-
-  private final implicit val log = getLog(classOf[VectorKryoSerializer])
-
-}
-
-class VectorKryoSerializer(val laxPrecision: Boolean = false) extends Serializer[Vector] {
-
-  import VectorKryoSerializer._
-
-  override def write(kryo: Kryo, output: Output, vector: Vector): Unit = {
-
-    require(vector != null)
-
-    trace(s"Serializing vector of ${vector.getClass.getName} class.")
-
-    // Write length
-    val len = vector.length
-    output.writeInt(len, true)
-
-    // Interrogate vec properties
-    val dense = vector.isDense
-    val sequential = vector.isSequentialAccess
-    val named = vector.isInstanceOf[NamedVector]
-
-    var flag = 0
-
-    if (dense) {
-      flag |= FLAG_DENSE
-    } else if (sequential) {
-      flag |= FLAG_SEQUENTIAL
-    }
-
-    if (vector.isInstanceOf[NamedVector]) {
-      flag |= FLAG_NAMED
-    }
-
-    if (laxPrecision) flag |= FLAG_LAX_PRECISION
-
-    // Write flags
-    output.writeByte(flag)
-
-    // Write name if needed
-    if (named) output.writeString(vector.asInstanceOf[NamedVector].getName)
-
-    dense match {
-
-      // Dense vector.
-      case true =>
-
-        laxPrecision match {
-          case true =>
-            for (i <- 0 until vector.length) output.writeFloat(vector(i).toFloat)
-          case _ =>
-            for (i <- 0 until vector.length) output.writeDouble(vector(i))
-        }
-      case _ =>
-
-        // Turns out getNumNonZeroElements must check every element if it is indeed non-zero. The
-        // iterateNonZeros() on the other hand doesn't do that, so that's all inconsistent right
-        // now. so we'll just auto-terminate.
-        val iter = vector.nonZeroes.toIterator.filter(_.get() != 0.0)
-
-        sequential match {
-
-          // Delta encoding
-          case true =>
-
-            var idx = 0
-            laxPrecision match {
-              case true =>
-                while (iter.hasNext) {
-                  val el = iter.next()
-                  output.writeFloat(el.toFloat)
-                  output.writeInt(el.index() - idx, true)
-                  idx = el.index
-                }
-                // Terminate delta encoding.
-                output.writeFloat(0.0.toFloat)
-              case _ =>
-                while (iter.hasNext) {
-                  val el = iter.next()
-                  output.writeDouble(el.get())
-                  output.writeInt(el.index() - idx, true)
-                  idx = el.index
-                }
-                // Terminate delta encoding.
-                output.writeDouble(0.0)
-            }
-
-          // Random access.
-          case _ =>
-
-            laxPrecision match {
-
-              case true =>
-                iter.foreach { el =>
-                  output.writeFloat(el.get().toFloat)
-                  output.writeInt(el.index(), true)
-                }
-                // Terminate random access with 0.0 value.
-                output.writeFloat(0.0.toFloat)
-              case _ =>
-                iter.foreach { el =>
-                  output.writeDouble(el.get())
-                  output.writeInt(el.index(), true)
-                }
-                // Terminate random access with 0.0 value.
-                output.writeDouble(0.0)
-            }
-
-        }
-
-    }
-  }
-
-  override def read(kryo: Kryo, input: Input, vecClass: Class[Vector]): Vector = {
-
-    val len = input.readInt(true)
-    val flags = input.readByte().toInt
-    val name = if ((flags & FLAG_NAMED) != 0) Some(input.readString()) else None
-
-    val vec: Vector = flags match {
-
-      // Dense
-      case _: Int if (flags & FLAG_DENSE) != 0 =>
-
-        trace(s"Deserializing dense vector.")
-
-        if ((flags & FLAG_LAX_PRECISION) != 0) {
-          new DenseVector(len) := { _ => input.readFloat()}
-        } else {
-          new DenseVector(len) := { _ => input.readDouble()}
-        }
-
-      // Sparse case.
-      case _ =>
-
-        flags match {
-
-          // Sequential.
-          case _: Int if (flags & FLAG_SEQUENTIAL) != 0 =>
-
-            trace("Deserializing as sequential sparse vector.")
-
-            val v = new SequentialAccessSparseVector(len)
-            var idx = 0
-            var stop = false
-
-            if ((flags & FLAG_LAX_PRECISION) != 0) {
-
-              while (!stop) {
-                val value = input.readFloat()
-                if (value == 0.0) {
-                  stop = true
-                } else {
-                  idx += input.readInt(true)
-                  v(idx) = value
-                }
-              }
-            } else {
-              while (!stop) {
-                val value = input.readDouble()
-                if (value == 0.0) {
-                  stop = true
-                } else {
-                  idx += input.readInt(true)
-                  v(idx) = value
-                }
-              }
-            }
-            v
-
-          // Random access
-          case _ =>
-
-            trace("Deserializing as random access vector.")
-
-            // Read pairs until we see 0.0 value. Prone to corruption attacks obviously.
-            val v = new RandomAccessSparseVector(len)
-            var stop = false
-            if ((flags & FLAG_LAX_PRECISION) != 0) {
-              while (! stop ) {
-                val value = input.readFloat()
-                if ( value == 0.0 ) {
-                  stop = true
-                } else {
-                  val idx = input.readInt(true)
-                  v(idx) = value
-                }
-              }
-            } else {
-              while (! stop ) {
-                val value = input.readDouble()
-                if (value == 0.0) {
-                  stop = true
-                } else {
-                  val idx = input.readInt(true)
-                  v(idx) = value
-                }
-              }
-            }
-            v
-        }
-    }
-
-    name.map{name =>
-
-      trace(s"Recovering named vector's name $name.")
-
-      new NamedVector(vec, name)
-    }
-      .getOrElse(vec)
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala b/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala
deleted file mode 100644
index 32515f1..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.drivers
-
-import org.apache.mahout.math.drm.DistributedContext
-
-/** Extended by a platform specific version of this class to create a Mahout CLI driver. */
-abstract class MahoutDriver {
-
-  implicit protected var mc: DistributedContext = _
-  implicit protected var parser: MahoutOptionParser = _
-
-  var _useExistingContext: Boolean = false // used in the test suite to reuse one context per suite
-
-  /** must be overriden to setup the DistributedContext mc*/
-  protected def start() : Unit
-
-  /** Override (optionally) for special cleanup */
-  protected def stop(): Unit = {
-    if (!_useExistingContext) mc.close
-  }
-
-  /** This is where you do the work, call start first, then before exiting call stop */
-  protected def process(): Unit
-
-  /** Parse command line and call process */
-  def main(args: Array[String]): Unit
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala b/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala
deleted file mode 100644
index d3723a2..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.drivers
-
-import scopt.OptionParser
-
-import scala.collection.immutable
-
-/**
- * Defines oft-repeated options and their parsing. Provides the option groups and parsing helper methods to
- * keep both standarized.
- * @param programName Name displayed in help message, the name by which the driver is invoked.
- * @note options are engine neutral by convention. See the engine specific extending class for
- *       to add Spark or other engine options.
- */
-class MahoutOptionParser(programName: String) extends OptionParser[Map[String, Any]](programName: String) {
-
-  // build options from some stardard CLI param groups
-  // Note: always put the driver specific options at the last so they can override any previous options!
-  var opts = Map.empty[String, Any]
-
-  override def showUsageOnError = true
-
-  def parseIOOptions(numInputs: Int = 1) = {
-    opts = opts ++ MahoutOptionParser.FileIOOptions
-    note("Input, output options")
-    opt[String]('i', "input") required() action { (x, options) =>
-      options + ("input" -> x)
-    } text ("Input path, may be a filename, directory name, or comma delimited list of HDFS supported URIs" +
-      " (required)")
-
-    if (numInputs == 2) {
-      opt[String]("input2") abbr ("i2") action { (x, options) =>
-        options + ("input2" -> x)
-      } text ("Secondary input path for cross-similarity calculation, same restrictions as \"--input\" " +
-        "(optional). Default: empty.")
-    }
-
-    opt[String]('o', "output") required() action { (x, options) =>
-      if (x.endsWith("/")) {
-        options + ("output" -> x)
-      } else {
-        options + ("output" -> (x + "/"))
-      }
-    } text ("Path for output directory, any HDFS supported URI (required)")
-
-  }
-
-  def parseGenericOptions() = {
-    opts = opts ++ MahoutOptionParser.GenericOptions
-    opt[Int]("randomSeed") abbr ("rs") action { (x, options) =>
-      options + ("randomSeed" -> x)
-    } validate { x =>
-      if (x > 0) success else failure("Option --randomSeed must be > 0")
-    }
-
-    //output both input IndexedDatasets
-    opt[Unit]("writeAllDatasets") hidden() action { (_, options) =>
-      options + ("writeAllDatasets" -> true)
-    }//Hidden option, though a user might want this.
-  }
-
-  def parseElementInputSchemaOptions() = {
-    //Input text file schema--not driver specific but input data specific, elements input,
-    // not rows of IndexedDatasets
-    opts = opts ++ MahoutOptionParser.TextDelimitedElementsOptions
-    note("\nInput text file schema options:")
-    opt[String]("inDelim") abbr ("id") text ("Input delimiter character (optional). Default: \"[ ,\\t]\"") action {
-      (x, options) =>
-        options + ("inDelim" -> x)
-    }
-
-    opt[String]("filter1") abbr ("f1") action { (x, options) =>
-      options + ("filter1" -> x)
-    } text ("String (or regex) whose presence indicates a datum for the primary item set (optional). " +
-      "Default: no filter, all data is used")
-
-    opt[String]("filter2") abbr ("f2") action { (x, options) =>
-      options + ("filter2" -> x)
-    } text ("String (or regex) whose presence indicates a datum for the secondary item set (optional). " +
-      "If not present no secondary dataset is collected")
-
-    opt[Int]("rowIDColumn") abbr ("rc") action { (x, options) =>
-      options + ("rowIDColumn" -> x)
-    } text ("Column number (0 based Int) containing the row ID string (optional). Default: 0") validate {
-      x =>
-        if (x >= 0) success else failure("Option --rowIDColNum must be >= 0")
-    }
-
-    opt[Int]("itemIDColumn") abbr ("ic") action { (x, options) =>
-      options + ("itemIDColumn" -> x)
-    } text ("Column number (0 based Int) containing the item ID string (optional). Default: 1") validate {
-      x =>
-        if (x >= 0) success else failure("Option --itemIDColNum must be >= 0")
-    }
-
-    opt[Int]("filterColumn") abbr ("fc") action { (x, options) =>
-      options + ("filterColumn" -> x)
-    } text ("Column number (0 based Int) containing the filter string (optional). Default: -1 for no " +
-      "filter") validate { x =>
-      if (x >= -1) success else failure("Option --filterColNum must be >= -1")
-    }
-
-    note("\nUsing all defaults the input is expected of the form: \"userID<tab>itemId\" or" +
-      " \"userID<tab>itemID<tab>any-text...\" and all rows will be used")
-
-    //check for column consistency
-    checkConfig { options: Map[String, Any] =>
-      if (options("filterColumn").asInstanceOf[Int] == options("itemIDColumn").asInstanceOf[Int]
-        || options("filterColumn").asInstanceOf[Int] == options("rowIDColumn").asInstanceOf[Int]
-        || options("rowIDColumn").asInstanceOf[Int] == options("itemIDColumn").asInstanceOf[Int])
-        failure("The row, item, and filter positions must be unique.") else success
-    }
-
-    //check for filter consistency
-    checkConfig { options: Map[String, Any] =>
-      if (options("filter1").asInstanceOf[String] != null.asInstanceOf[String]
-        && options("filter2").asInstanceOf[String] != null.asInstanceOf[String]
-        && options("filter1").asInstanceOf[String] == options("filter2").asInstanceOf[String])
-        failure ("If using filters they must be unique.") else success
-    }
-
-  }
-
-  def parseFileDiscoveryOptions() = {
-    //File finding strategy--not driver specific
-    opts = opts ++ MahoutOptionParser.FileDiscoveryOptions
-    note("\nFile discovery options:")
-    opt[Unit]('r', "recursive") action { (_, options) =>
-      options + ("recursive" -> true)
-    } text ("Searched the -i path recursively for files that match --filenamePattern (optional), Default: false")
-
-    opt[String]("filenamePattern") abbr ("fp") action { (x, options) =>
-      options + ("filenamePattern" -> x)
-    } text ("Regex to match in determining input files (optional). Default: filename in the --input option " +
-      "or \"^part-.*\" if --input is a directory")
-
-  }
-
-  def parseIndexedDatasetFormatOptions(notice: String = "\nOutput text file schema options:") = {
-    opts = opts ++ MahoutOptionParser.TextDelimitedIndexedDatasetOptions
-    note(notice)
-    opt[String]("rowKeyDelim") abbr ("rd") action { (x, options) =>
-      options + ("rowKeyDelim" -> x)
-    } text ("Separates the rowID key from the vector values list (optional). Default: \"\\t\"")
-
-    opt[String]("columnIdStrengthDelim") abbr ("cd") action { (x, options) =>
-      options + ("columnIdStrengthDelim" -> x)
-    } text ("Separates column IDs from their values in the vector values list (optional). Default: \":\"")
-
-    opt[String]("elementDelim") abbr ("td") action { (x, options) =>
-      options + ("elementDelim" -> x)
-    } text ("Separates vector element values in the values list (optional). Default: \" \"")
-
-    opt[Unit]("omitStrength") abbr ("os") action { (_, options) =>
-      options + ("omitStrength" -> true)
-    } text ("Do not write the strength to the output files (optional), Default: false.")
-    note("This option is used to output indexable data for creating a search engine recommender.")
-
-    note("\nDefault delimiters will produce output of the form: " +
-      "\"itemID1<tab>itemID2:value2<space>itemID10:value10...\"")
-  }
-
-}
-
-/**
- * Companion object defines default option groups for reference in any driver that needs them.
- * @note not all options are platform neutral so other platforms can add default options here if desired
- */
-object MahoutOptionParser {
-
-  // set up the various default option groups
-  final val GenericOptions = immutable.HashMap[String, Any](
-    "randomSeed" -> System.currentTimeMillis().toInt,
-    "writeAllDatasets" -> false)
-
-  final val SparkOptions = immutable.HashMap[String, Any](
-    "master" -> "local",
-    "sparkExecutorMem" -> "",
-    "appName" -> "Generic Spark App, Change this.")
-
-  final val FileIOOptions = immutable.HashMap[String, Any](
-    "input" -> null.asInstanceOf[String],
-    "input2" -> null.asInstanceOf[String],
-    "output" -> null.asInstanceOf[String])
-
-  final val FileDiscoveryOptions = immutable.HashMap[String, Any](
-    "recursive" -> false,
-    "filenamePattern" -> "^part-.*")
-
-  final val TextDelimitedElementsOptions = immutable.HashMap[String, Any](
-    "rowIDColumn" -> 0,
-    "itemIDColumn" -> 1,
-    "filterColumn" -> -1,
-    "filter1" -> null.asInstanceOf[String],
-    "filter2" -> null.asInstanceOf[String],
-    "inDelim" -> "[,\t ]")
-
-  final val TextDelimitedIndexedDatasetOptions = immutable.HashMap[String, Any](
-    "rowKeyDelim" -> "\t",
-    "columnIdStrengthDelim" -> ":",
-    "elementDelim" -> " ",
-    "omitStrength" -> false)
-}
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/logging/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/logging/package.scala b/math-scala/src/main/scala/org/apache/mahout/logging/package.scala
deleted file mode 100644
index 15aa909..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/logging/package.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout
-
-import org.apache.log4j.{Level, Priority, Logger}
-
-package object logging {
-
-  /** Compute `expr` if debug is on, only */
-  def debugDo[T](expr: => T)(implicit log: Logger): Option[T] = {
-    if (log.isDebugEnabled) Some(expr)
-    else None
-  }
-
-  /** Compute `expr` if trace is on, only */
-  def traceDo[T](expr: => T)(implicit log: Logger): Option[T] = {
-    if (log.isTraceEnabled) Some(expr) else None
-  }
-
-  /** Shorter, and lazy, versions of logging methods. Just declare log implicit. */
-  def debug(msg: => AnyRef)(implicit log: Logger) { if (log.isDebugEnabled) log.debug(msg) }
-
-  def debug(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isDebugEnabled()) log.debug(msg, t) }
-
-  /** Shorter, and lazy, versions of logging methods. Just declare log implicit. */
-  def trace(msg: => AnyRef)(implicit log: Logger) { if (log.isTraceEnabled) log.trace(msg) }
-
-  def trace(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isTraceEnabled()) log.trace(msg, t) }
-
-  def info(msg: => AnyRef)(implicit log: Logger) { if (log.isInfoEnabled) log.info(msg)}
-
-  def info(msg: => AnyRef, t:Throwable)(implicit log: Logger) { if (log.isInfoEnabled) log.info(msg,t)}
-
-  def warn(msg: => AnyRef)(implicit log: Logger) { if (log.isEnabledFor(Level.WARN)) log.warn(msg) }
-
-  def warn(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isEnabledFor(Level.WARN)) error(msg, t) }
-
-  def error(msg: => AnyRef)(implicit log: Logger) { if (log.isEnabledFor(Level.ERROR)) log.warn(msg) }
-
-  def error(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isEnabledFor(Level.ERROR)) error(msg, t) }
-
-  def fatal(msg: => AnyRef)(implicit log: Logger) { if (log.isEnabledFor(Level.FATAL)) log.fatal(msg) }
-
-  def fatal(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isEnabledFor(Level.FATAL)) log.fatal(msg, t) }
-
-  def getLog(name: String): Logger = Logger.getLogger(name)
-
-  def getLog(clazz: Class[_]): Logger = Logger.getLogger(clazz)
-
-  def mahoutLog :Logger = getLog("org.apache.mahout")
-
-  def setLogLevel(l:Level)(implicit log:Logger) = {
-    log.setLevel(l)
-  }
-
-  def setAdditivity(a:Boolean)(implicit log:Logger) = log.setAdditivity(a)
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala
deleted file mode 100644
index 244cefc..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
-  * Licensed to the Apache Software Foundation (ASF) under one
-  * or more contributor license agreements. See the NOTICE file
-  * distributed with this work for additional information
-  * regarding copyright ownership. The ASF licenses this file
-  * to you under the Apache License, Version 2.0 (the
-  * "License"); you may not use this file except in compliance
-  * with the License. You may obtain a copy of the License at
-  *
-  * http://www.apache.org/licenses/LICENSE-2.0
-  *
-  * Unless required by applicable law or agreed to in writing,
-  * software distributed under the License is distributed on an
-  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  * KIND, either express or implied. See the License for the
-  * specific language governing permissions and limitations
-  * under the License.
-  */
-
-package org.apache.mahout.math.algorithms
-
-trait Fitter {
-
-  // all models must have a fit method... signatures change.
-  // leaving this as place holder incase we decide there are somethings all Models must have in common
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala
deleted file mode 100644
index 0fbe8ac..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
-  * Licensed to the Apache Software Foundation (ASF) under one
-  * or more contributor license agreements. See the NOTICE file
-  * distributed with this work for additional information
-  * regarding copyright ownership. The ASF licenses this file
-  * to you under the Apache License, Version 2.0 (the
-  * "License"); you may not use this file except in compliance
-  * with the License. You may obtain a copy of the License at
-  *
-  * http://www.apache.org/licenses/LICENSE-2.0
-  *
-  * Unless required by applicable law or agreed to in writing,
-  * software distributed under the License is distributed on an
-  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  * KIND, either express or implied. See the License for the
-  * specific language governing permissions and limitations
-  * under the License.
-  */
-
-package org.apache.mahout.math.algorithms
-
-trait Model extends Serializable {
-
-  var summary: String = ""
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala
deleted file mode 100644
index bf85dee..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
-  * Licensed to the Apache Software Foundation (ASF) under one
-  * or more contributor license agreements. See the NOTICE file
-  * distributed with this work for additional information
-  * regarding copyright ownership. The ASF licenses this file
-  * to you under the Apache License, Version 2.0 (the
-  * "License"); you may not use this file except in compliance
-  * with the License. You may obtain a copy of the License at
-  *
-  * http://www.apache.org/licenses/LICENSE-2.0
-  *
-  * Unless required by applicable law or agreed to in writing,
-  * software distributed under the License is distributed on an
-  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  * KIND, either express or implied. See the License for the
-  * specific language governing permissions and limitations
-  * under the License.
-  */
-
-package org.apache.mahout.math.algorithms
-
-import org.apache.mahout.math.drm.DrmLike
-
-trait SupervisedFitter[K, M <: SupervisedModel[K]] extends Fitter {
-
-  def fit(drmX  : DrmLike[K],
-          drmTarget: DrmLike[K],
-          hyperparameters: (Symbol, Any)*): M
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala
deleted file mode 100644
index 57c20e7..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
-  * Licensed to the Apache Software Foundation (ASF) under one
-  * or more contributor license agreements. See the NOTICE file
-  * distributed with this work for additional information
-  * regarding copyright ownership. The ASF licenses this file
-  * to you under the Apache License, Version 2.0 (the
-  * "License"); you may not use this file except in compliance
-  * with the License. You may obtain a copy of the License at
-  *
-  * http://www.apache.org/licenses/LICENSE-2.0
-  *
-  * Unless required by applicable law or agreed to in writing,
-  * software distributed under the License is distributed on an
-  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  * KIND, either express or implied. See the License for the
-  * specific language governing permissions and limitations
-  * under the License.
-  */
-
-package org.apache.mahout.math.algorithms
-
-import scala.collection.mutable
-
-trait SupervisedModel[K] extends Model {
-  var testResults: mutable.Map[Symbol, Any] = mutable.Map[Symbol, Any]()
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala
deleted file mode 100644
index 5c191d1..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
-  * Licensed to the Apache Software Foundation (ASF) under one
-  * or more contributor license agreements. See the NOTICE file
-  * distributed with this work for additional information
-  * regarding copyright ownership. The ASF licenses this file
-  * to you under the Apache License, Version 2.0 (the
-  * "License"); you may not use this file except in compliance
-  * with the License. You may obtain a copy of the License at
-  *
-  * http://www.apache.org/licenses/LICENSE-2.0
-  *
-  * Unless required by applicable law or agreed to in writing,
-  * software distributed under the License is distributed on an
-  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  * KIND, either express or implied. See the License for the
-  * specific language governing permissions and limitations
-  * under the License.
-  */
-
-package org.apache.mahout.math.algorithms
-
-import org.apache.mahout.math.drm.DrmLike
-
-trait UnsupervisedFitter extends Fitter {
-
-  def fit[K](input: DrmLike[K],
-             hyperparameters: (Symbol, Any)*): UnsupervisedModel
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala
deleted file mode 100644
index f8ff341..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-/**
-  * Licensed to the Apache Software Foundation (ASF) under one
-  * or more contributor license agreements. See the NOTICE file
-  * distributed with this work for additional information
-  * regarding copyright ownership. The ASF licenses this file
-  * to you under the Apache License, Version 2.0 (the
-  * "License"); you may not use this file except in compliance
-  * with the License. You may obtain a copy of the License at
-  *
-  * http://www.apache.org/licenses/LICENSE-2.0
-  *
-  * Unless required by applicable law or agreed to in writing,
-  * software distributed under the License is distributed on an
-  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  * KIND, either express or implied. See the License for the
-  * specific language governing permissions and limitations
-  * under the License.
-  */
-
-package org.apache.mahout.math.algorithms
-
-trait UnsupervisedModel extends Model {
-
-}