Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 75E47200CB4 for ; Tue, 27 Jun 2017 18:14:39 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 746AB160BFF; Tue, 27 Jun 2017 16:14:39 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 85278160BFD for ; Tue, 27 Jun 2017 18:14:36 +0200 (CEST) Received: (qmail 86931 invoked by uid 500); 27 Jun 2017 16:14:30 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 85162 invoked by uid 99); 27 Jun 2017 16:14:29 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 27 Jun 2017 16:14:29 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 5D7CBED4A2; Tue, 27 Jun 2017 16:14:26 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: vanstee@apache.org To: commits@mahout.apache.org Date: Tue, 27 Jun 2017 16:14:42 -0000 Message-Id: <017e6bb648084c4f9457d0d34992e468@git.apache.org> In-Reply-To: <7a54dd6c14144c2ea76887793d2dc849@git.apache.org> References: <7a54dd6c14144c2ea76887793d2dc849@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [17/52] [partial] mahout git commit: removed all files except for website directory archived-at: Tue, 27 Jun 2017 16:14:39 -0000 http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala b/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala deleted file mode 100644 index 36fc551..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.classifier.naivebayes - -import org.apache.mahout.classifier.stats.{ResultAnalyzer, ClassifierResult} -import org.apache.mahout.math._ -import scalabindings._ -import scalabindings.RLikeOps._ -import drm.RLikeDrmOps._ -import drm._ -import scala.reflect.ClassTag -import scala.language.asInstanceOf -import collection._ -import scala.collection.JavaConversions._ - -/** - * Distributed training of a Naive Bayes model. Follows the approach presented in Rennie et.al.: Tackling the poor - * assumptions of Naive Bayes Text classifiers, ICML 2003, http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf - */ -trait NaiveBayes extends java.io.Serializable{ - - /** default value for the Laplacian smoothing parameter */ - def defaultAlphaI = 1.0f - - // function to extract categories from string keys - type CategoryParser = String => String - - /** Default: seqdirectory/seq2Sparse Categories are Stored in Drm Keys as: /Category/document_id */ - def seq2SparseCategoryParser: CategoryParser = x => x.split("/")(1) - - - /** - * Distributed training of a Naive Bayes model. Follows the approach presented in Rennie et.al.: Tackling the poor - * assumptions of Naive Bayes Text classifiers, ICML 2003, http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf - * - * @param observationsPerLabel a DrmLike[Int] matrix containing term frequency counts for each label. - * @param trainComplementary whether or not to train a complementary Naive Bayes model - * @param alphaI Laplace smoothing parameter - * @return trained naive bayes model - */ - def train(observationsPerLabel: DrmLike[Int], - labelIndex: Map[String, Integer], - trainComplementary: Boolean = true, - alphaI: Float = defaultAlphaI): NBModel = { - - // Summation of all weights per feature - val weightsPerFeature = observationsPerLabel.colSums - - // Distributed summation of all weights per label - val weightsPerLabel = observationsPerLabel.rowSums - - // Collect a matrix to pass to the NaiveBayesModel - val inCoreTFIDF = observationsPerLabel.collect - - // perLabelThetaNormalizer Vector is expected by NaiveBayesModel. We can pass a null value - // or Vector of zeroes in the case of a standard NB model. - var thetaNormalizer = weightsPerFeature.like() - - // Instantiate a trainer and retrieve the perLabelThetaNormalizer Vector from it in the case of - // a complementary NB model - if (trainComplementary) { - val thetaTrainer = new ComplementaryNBThetaTrainer(weightsPerFeature, - weightsPerLabel, - alphaI) - // local training of the theta normalization - for (labelIndex <- 0 until inCoreTFIDF.nrow) { - thetaTrainer.train(labelIndex, inCoreTFIDF(labelIndex, ::)) - } - thetaNormalizer = thetaTrainer.retrievePerLabelThetaNormalizer - } - - new NBModel(inCoreTFIDF, - weightsPerFeature, - weightsPerLabel, - thetaNormalizer, - labelIndex, - alphaI, - trainComplementary) - } - - /** - * Extract label Keys from raw TF or TF-IDF Matrix generated by seqdirectory/seq2sparse - * and aggregate TF or TF-IDF values by their label - * Override this method in engine specific modules to optimize - * - * @param stringKeyedObservations DrmLike matrix; Output from seq2sparse - * in form K = eg./Category/document_title - * V = TF or TF-IDF values per term - * @param cParser a String => String function used to extract categories from - * Keys of the stringKeyedObservations DRM. The default - * CategoryParser will extract "Category" from: '/Category/document_id' - * @return (labelIndexMap,aggregatedByLabelObservationDrm) - * labelIndexMap is a HashMap [String, Integer] K = label row index - * V = label - * aggregatedByLabelObservationDrm is a DrmLike[Int] of aggregated - * TF or TF-IDF counts per label - */ - def extractLabelsAndAggregateObservations[K](stringKeyedObservations: DrmLike[K], - cParser: CategoryParser = seq2SparseCategoryParser) - (implicit ctx: DistributedContext): - (mutable.HashMap[String, Integer], DrmLike[Int])= { - - stringKeyedObservations.checkpoint() - - val numDocs=stringKeyedObservations.nrow - val numFeatures=stringKeyedObservations.ncol - - // For mapblocks that return K. - implicit val ktag = stringKeyedObservations.keyClassTag - - // Extract categories from labels assigned by seq2sparse - // Categories are Stored in Drm Keys as eg.: /Category/document_id - - // Get a new DRM with a single column so that we don't have to collect the - // DRM into memory upfront. - val strippedObeservations = stringKeyedObservations.mapBlock(ncol = 1) { - case (keys, block) => - val blockB = block.like(keys.size, 1) - keys -> blockB - } - - // Extract the row label bindings (the String keys) from the slim Drm - // strip the document_id from the row keys keeping only the category. - // Sort the bindings alphabetically into a Vector - val labelVectorByRowIndex = strippedObeservations - .getRowLabelBindings - .map(x => x._2 -> cParser(x._1)) - .toVector.sortWith(_._1 < _._1) - - //TODO: add a .toIntKeyed(...) method to DrmLike? - - // Copy stringKeyedObservations to an Int-Keyed Drm so that we can compute transpose - // Copy the Collected Matrices up front for now until we hav a distributed way of converting - val inCoreStringKeyedObservations = stringKeyedObservations.collect - val inCoreIntKeyedObservations = new SparseMatrix( - stringKeyedObservations.nrow.toInt, - stringKeyedObservations.ncol) - for (i <- 0 until inCoreStringKeyedObservations.nrow) { - inCoreIntKeyedObservations(i, ::) = inCoreStringKeyedObservations(i, ::) - } - - val intKeyedObservations= drmParallelize(inCoreIntKeyedObservations) - - stringKeyedObservations.uncache() - - var labelIndex = 0 - val labelIndexMap = new mutable.HashMap[String, Integer] - val encodedLabelByRowIndexVector = new DenseVector(labelVectorByRowIndex.size) - - // Encode Categories as an Integer (Double) so we can broadcast as a vector - // where each element is an Int-encoded category whose index corresponds - // to its row in the Drm - for (i <- labelVectorByRowIndex.indices) { - if (!labelIndexMap.contains(labelVectorByRowIndex(i)._2)) { - encodedLabelByRowIndexVector(i) = labelIndex.toDouble - labelIndexMap.put(labelVectorByRowIndex(i)._2, labelIndex) - labelIndex += 1 - } - // don't like this casting but need to use a java.lang.Integer when setting rowLabelBindings - encodedLabelByRowIndexVector(i) = labelIndexMap - .getOrElse(labelVectorByRowIndex(i)._2, -1) - .asInstanceOf[Int].toDouble - } - - // "Combiner": Map and aggregate by Category. Do this by broadcasting the encoded - // category vector and mapping a transposed IntKeyed Drm out so that all categories - // will be present on all nodes as columns and can be referenced by - // BCastEncodedCategoryByRowVector. Iteratively sum all categories. - val nLabels = labelIndex - - val bcastEncodedCategoryByRowVector = drmBroadcast(encodedLabelByRowIndexVector) - - val aggregetedObservationByLabelDrm = intKeyedObservations.t.mapBlock(ncol = nLabels) { - case (keys, blockA) => - val blockB = blockA.like(keys.size, nLabels) - var label : Int = 0 - for (i <- 0 until keys.size) { - blockA(i, ::).nonZeroes().foreach { elem => - label = bcastEncodedCategoryByRowVector.get(elem.index).toInt - blockB(i, label) = blockB(i, label) + blockA(i, elem.index) - } - } - keys -> blockB - }.t - - (labelIndexMap, aggregetedObservationByLabelDrm) - } - - /** - * Test a trained model with a labeled dataset sequentially - * @param model a trained NBModel - * @param testSet a labeled testing set - * @param testComplementary test using a complementary or a standard NB classifier - * @param cParser a String => String function used to extract categories from - * Keys of the testing set DRM. The default - * CategoryParser will extract "Category" from: '/Category/document_id' - * - * *Note*: this method brings the entire test set into upfront memory, - * This method is optimized and parallelized in SparkNaiveBayes - * - * @tparam K implicitly determined Key type of test set DRM: String - * @return a result analyzer with confusion matrix and accuracy statistics - */ - def test[K: ClassTag](model: NBModel, - testSet: DrmLike[K], - testComplementary: Boolean = false, - cParser: CategoryParser = seq2SparseCategoryParser) - (implicit ctx: DistributedContext): ResultAnalyzer = { - - val labelMap = model.labelIndex - - val numLabels = model.numLabels - - testSet.checkpoint() - - val numTestInstances = testSet.nrow.toInt - - // instantiate the correct type of classifier - val classifier = testComplementary match { - case true => new ComplementaryNBClassifier(model) with Serializable - case _ => new StandardNBClassifier(model) with Serializable - } - - if (testComplementary) { - assert(testComplementary == model.isComplementary, - "Complementary Label Assignment requires Complementary Training") - } - - - // Sequentially assign labels to the test set: - // *Note* this brings the entire test set into memory upfront: - - // Since we cant broadcast the model as is do it sequentially up front for now - val inCoreTestSet = testSet.collect - - // get the labels of the test set and extract the keys - val testSetLabelMap = testSet.getRowLabelBindings - - // empty Matrix in which we'll set the classification scores - val inCoreScoredTestSet = testSet.like(numTestInstances, numLabels) - - testSet.uncache() - - for (i <- 0 until numTestInstances) { - inCoreScoredTestSet(i, ::) := classifier.classifyFull(inCoreTestSet(i, ::)) - } - - // todo: reverse the labelMaps in training and through the model? - - // reverse the label map and extract the labels - val reverseTestSetLabelMap = testSetLabelMap.map(x => x._2 -> cParser(x._1)) - - val reverseLabelMap = labelMap.map(x => x._2 -> x._1) - - val analyzer = new ResultAnalyzer(labelMap.keys.toList.sorted, "DEFAULT") - - // assign labels- winner takes all - for (i <- 0 until numTestInstances) { - val (bestIdx, bestScore) = argmax(inCoreScoredTestSet(i, ::)) - val classifierResult = new ClassifierResult(reverseLabelMap(bestIdx), bestScore) - analyzer.addInstance(reverseTestSetLabelMap(i), classifierResult) - } - - analyzer - } - - /** - * argmax with values as well - * returns a tuple of index of the max score and the score itself. - * @param v Vector of of scores - * @return (bestIndex, bestScore) - */ - def argmax(v: Vector): (Int, Double) = { - var bestIdx: Int = Integer.MIN_VALUE - var bestScore: Double = Integer.MIN_VALUE.toDouble - for(i <- 0 until v.size) { - if(v(i) > bestScore){ - bestScore = v(i) - bestIdx = i - } - } - (bestIdx, bestScore) - } - -} - -object NaiveBayes extends NaiveBayes with java.io.Serializable - -/** - * Trainer for the weight normalization vector used by Transform Weight Normalized Complement - * Naive Bayes. See: Rennie et.al.: Tackling the poor assumptions of Naive Bayes Text classifiers, - * ICML 2003, http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf Sec. 3.2. - * - * @param weightsPerFeature a Vector of summed TF or TF-IDF weights for each word in dictionary. - * @param weightsPerLabel a Vector of summed TF or TF-IDF weights for each label. - * @param alphaI Laplace smoothing factor. Defaut value of 1. - */ -class ComplementaryNBThetaTrainer(private val weightsPerFeature: Vector, - private val weightsPerLabel: Vector, - private val alphaI: Double = 1.0) { - - private val perLabelThetaNormalizer: Vector = weightsPerLabel.like() - private val totalWeightSum: Double = weightsPerLabel.zSum - private val numFeatures: Double = weightsPerFeature.getNumNondefaultElements - - assert(weightsPerFeature != null, "weightsPerFeature vector can not be null") - assert(weightsPerLabel != null, "weightsPerLabel vector can not be null") - - /** - * Train the weight normalization vector for each label - * @param label - * @param featurePerLabelWeight - */ - def train(label: Int, featurePerLabelWeight: Vector) { - val currentLabelWeight = labelWeight(label) - // sum weights for each label including those with zero word counts - for (i <- 0 until featurePerLabelWeight.size) { - val currentFeaturePerLabelWeight = featurePerLabelWeight(i) - updatePerLabelThetaNormalizer(label, - ComplementaryNBClassifier.computeWeight(featureWeight(i), - currentFeaturePerLabelWeight, - totalWeightSum, - currentLabelWeight, - alphaI, - numFeatures) - ) - } - } - - /** - * getter for summed TF or TF-IDF weights by label - * @param label index of label - * @return sum of word TF or TF-IDF weights for label - */ - def labelWeight(label: Int): Double = { - weightsPerLabel(label) - } - - /** - * getter for summed TF or TF-IDF weights by word. - * @param feature index of word. - * @return sum of TF or TF-IDF weights for word. - */ - def featureWeight(feature: Int): Double = { - weightsPerFeature(feature) - } - - /** - * add the magnitude of the current weight to the current - * label's corresponding Vector element. - * @param label index of label to update. - * @param weight weight to add. - */ - def updatePerLabelThetaNormalizer(label: Int, weight: Double) { - perLabelThetaNormalizer(label) = perLabelThetaNormalizer(label) + Math.abs(weight) - } - - /** - * Getter for the weight normalizer vector as indexed by label - * @return a copy of the weight normalizer vector. - */ - def retrievePerLabelThetaNormalizer: Vector = { - perLabelThetaNormalizer.cloned - } - - - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala b/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala deleted file mode 100644 index 8f1413a..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala +++ /dev/null @@ -1,467 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package org.apache.mahout.classifier.stats - -import java.text.{DecimalFormat, NumberFormat} -import java.util -import org.apache.mahout.math.stats.OnlineSummarizer - - -/** - * Result of a document classification. The label and the associated score (usually probabilty) - */ -class ClassifierResult (private var label: String = null, - private var score: Double = 0.0, - private var logLikelihood: Double = Integer.MAX_VALUE.toDouble) { - - def getLogLikelihood: Double = logLikelihood - - def setLogLikelihood(llh: Double) { - logLikelihood = llh - } - - def getLabel: String = label - - def getScore: Double = score - - def setLabel(lbl: String) { - label = lbl - } - - def setScore(sc: Double) { - score = sc - } - - override def toString: String = { - "ClassifierResult{" + "category='" + label + '\'' + ", score=" + score + '}' - } - -} - -/** - * ResultAnalyzer captures the classification statistics and displays in a tabular manner - * @param labelSet Set of labels to be considered in classification - * @param defaultLabel the default label for an unknown classification - */ -class ResultAnalyzer(private val labelSet: util.Collection[String], defaultLabel: String) { - - val confusionMatrix = new ConfusionMatrix(labelSet, defaultLabel) - val summarizer = new OnlineSummarizer - - private var hasLL: Boolean = false - private var correctlyClassified: Int = 0 - private var incorrectlyClassified: Int = 0 - - - def getConfusionMatrix: ConfusionMatrix = confusionMatrix - - /** - * - * @param correctLabel - * The correct label - * @param classifiedResult - * The classified result - * @return whether the instance was correct or not - */ - def addInstance(correctLabel: String, classifiedResult: ClassifierResult): Boolean = { - val result: Boolean = correctLabel == classifiedResult.getLabel - if (result) { - correctlyClassified += 1 - } - else { - incorrectlyClassified += 1 - } - confusionMatrix.addInstance(correctLabel, classifiedResult) - if (classifiedResult.getLogLikelihood != Integer.MAX_VALUE.toDouble) { - summarizer.add(classifiedResult.getLogLikelihood) - hasLL = true - } - - result - } - - /** Dump the resulting statistics to a string */ - override def toString: String = { - val returnString: StringBuilder = new StringBuilder - returnString.append('\n') - returnString.append("=======================================================\n") - returnString.append("Summary\n") - returnString.append("-------------------------------------------------------\n") - val totalClassified: Int = correctlyClassified + incorrectlyClassified - val percentageCorrect: Double = 100.asInstanceOf[Double] * correctlyClassified / totalClassified - val percentageIncorrect: Double = 100.asInstanceOf[Double] * incorrectlyClassified / totalClassified - val decimalFormatter: NumberFormat = new DecimalFormat("0.####") - returnString.append("Correctly Classified Instances") - .append(": ") - .append(Integer.toString(correctlyClassified)) - .append('\t') - .append(decimalFormatter.format(percentageCorrect)) - .append("%\n") - returnString.append("Incorrectly Classified Instances") - .append(": ") - .append(Integer.toString(incorrectlyClassified)) - .append('\t') - .append(decimalFormatter.format(percentageIncorrect)) - .append("%\n") - returnString.append("Total Classified Instances") - .append(": ") - .append(Integer.toString(totalClassified)) - .append('\n') - returnString.append('\n') - returnString.append(confusionMatrix) - returnString.append("=======================================================\n") - returnString.append("Statistics\n") - returnString.append("-------------------------------------------------------\n") - val normStats: RunningAverageAndStdDev = confusionMatrix.getNormalizedStats - returnString.append("Kappa: \t") - .append(decimalFormatter.format(confusionMatrix.getKappa)) - .append('\n') - returnString.append("Accuracy: \t") - .append(decimalFormatter.format(confusionMatrix.getAccuracy)) - .append("%\n") - returnString.append("Reliability: \t") - .append(decimalFormatter.format(normStats.getAverage * 100.00000001)) - .append("%\n") - returnString.append("Reliability (std dev): \t") - .append(decimalFormatter.format(normStats.getStandardDeviation)) - .append('\n') - returnString.append("Weighted precision: \t") - .append(decimalFormatter.format(confusionMatrix.getWeightedPrecision)) - .append('\n') - returnString.append("Weighted recall: \t") - .append(decimalFormatter.format(confusionMatrix.getWeightedRecall)) - .append('\n') - returnString.append("Weighted F1 score: \t") - .append(decimalFormatter.format(confusionMatrix.getWeightedF1score)) - .append('\n') - if (hasLL) { - returnString.append("Log-likelihood: \t") - .append("mean : \t") - .append(decimalFormatter.format(summarizer.getMean)) - .append('\n') - returnString.append("25%-ile : \t") - .append(decimalFormatter.format(summarizer.getQuartile(1))) - .append('\n') - returnString.append("75%-ile : \t") - .append(decimalFormatter.format(summarizer.getQuartile(3))) - .append('\n') - } - - returnString.toString() - } - - -} - -/** - * - * Interface for classes that can keep track of a running average of a series of numbers. One can add to or - * remove from the series, as well as update a datum in the series. The class does not actually keep track of - * the series of values, just its running average, so it doesn't even matter if you remove/change a value that - * wasn't added. - * - * Ported from org.apache.mahout.cf.taste.impl.common.RunningAverage.java - */ -trait RunningAverage { - - /** - * @param datum - * new item to add to the running average - * @throws IllegalArgumentException - * if datum is { @link Double#NaN} - */ - def addDatum(datum: Double) - - /** - * @param datum - * item to remove to the running average - * @throws IllegalArgumentException - * if datum is { @link Double#NaN} - * @throws IllegalStateException - * if count is 0 - */ - def removeDatum(datum: Double) - - /** - * @param delta - * amount by which to change a datum in the running average - * @throws IllegalArgumentException - * if delta is { @link Double#NaN} - * @throws IllegalStateException - * if count is 0 - */ - def changeDatum(delta: Double) - - def getCount: Int - - def getAverage: Double - - /** - * @return a (possibly immutable) object whose average is the negative of this object's - */ - def inverse: RunningAverage -} - -/** - * - * Extends {@link RunningAverage} by adding standard deviation too. - * - * Ported from org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev.java - */ -trait RunningAverageAndStdDev extends RunningAverage { - - /** @return standard deviation of data */ - def getStandardDeviation: Double - - /** - * @return a (possibly immutable) object whose average is the negative of this object's - */ - def inverse: RunningAverageAndStdDev -} - - -class InvertedRunningAverage(private val delegate: RunningAverage) extends RunningAverage { - - override def addDatum(datum: Double) { - throw new UnsupportedOperationException - } - - override def removeDatum(datum: Double) { - throw new UnsupportedOperationException - } - - override def changeDatum(delta: Double) { - throw new UnsupportedOperationException - } - - override def getCount: Int = { - delegate.getCount - } - - override def getAverage: Double = { - -delegate.getAverage - } - - override def inverse: RunningAverage = { - delegate - } -} - - -/** - * - * A simple class that can keep track of a running average of a series of numbers. One can add to or remove - * from the series, as well as update a datum in the series. The class does not actually keep track of the - * series of values, just its running average, so it doesn't even matter if you remove/change a value that - * wasn't added. - * - * Ported from org.apache.mahout.cf.taste.impl.common.FullRunningAverage.java - */ -class FullRunningAverage(private var count: Int = 0, - private var average: Double = Double.NaN ) extends RunningAverage { - - /** - * @param datum - * new item to add to the running average - */ - override def addDatum(datum: Double) { - count += 1 - if (count == 1) { - average = datum - } - else { - average = average * (count - 1) / count + datum / count - } - } - - /** - * @param datum - * item to remove from the running average - * @throws IllegalStateException - * if count is 0 - */ - override def removeDatum(datum: Double) { - if (count == 0) { - throw new IllegalStateException - } - count -= 1 - if (count == 0) { - average = Double.NaN - } - else { - average = average * (count + 1) / count - datum / count - } - } - - /** - * @param delta - * amount by which to change a datum in the running average - * @throws IllegalStateException - * if count is 0 - */ - override def changeDatum(delta: Double) { - if (count == 0) { - throw new IllegalStateException - } - average += delta / count - } - - override def getCount: Int = { - count - } - - override def getAverage: Double = { - average - } - - override def inverse: RunningAverage = { - new InvertedRunningAverage(this) - } - - override def toString: String = { - String.valueOf(average) - } -} - - -/** - * - * Extends {@link FullRunningAverage} to add a running standard deviation computation. - * Uses Welford's method, as described at http://www.johndcook.com/standard_deviation.html - * - * Ported from org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev.java - */ -class FullRunningAverageAndStdDev(private var count: Int = 0, - private var average: Double = 0.0, - private var mk: Double = 0.0, - private var sk: Double = 0.0) extends FullRunningAverage with RunningAverageAndStdDev { - - var stdDev: Double = 0.0 - - recomputeStdDev - - def getMk: Double = { - mk - } - - def getSk: Double = { - sk - } - - override def getStandardDeviation: Double = { - stdDev - } - - override def addDatum(datum: Double) { - super.addDatum(datum) - val count: Int = getCount - if (count == 1) { - mk = datum - sk = 0.0 - } - else { - val oldmk: Double = mk - val diff: Double = datum - oldmk - mk += diff / count - sk += diff * (datum - mk) - } - recomputeStdDev - } - - override def removeDatum(datum: Double) { - val oldCount: Int = getCount - super.removeDatum(datum) - val oldmk: Double = mk - mk = (oldCount * oldmk - datum) / (oldCount - 1) - sk -= (datum - mk) * (datum - oldmk) - recomputeStdDev - } - - /** - * @throws UnsupportedOperationException - */ - override def changeDatum(delta: Double) { - throw new UnsupportedOperationException - } - - private def recomputeStdDev { - val count: Int = getCount - stdDev = if (count > 1) Math.sqrt(sk / (count - 1)) else Double.NaN - } - - override def inverse: RunningAverageAndStdDev = { - new InvertedRunningAverageAndStdDev(this) - } - - override def toString: String = { - String.valueOf(String.valueOf(getAverage) + ',' + stdDev) - } - -} - - -/** - * - * @param delegate RunningAverageAndStdDev instance - * - * Ported from org.apache.mahout.cf.taste.impl.common.InvertedRunningAverageAndStdDev.java - */ -class InvertedRunningAverageAndStdDev(private val delegate: RunningAverageAndStdDev) extends RunningAverageAndStdDev { - - /** - * @throws UnsupportedOperationException - */ - override def addDatum(datum: Double) { - throw new UnsupportedOperationException - } - - /** - * @throws UnsupportedOperationException - */ - - override def removeDatum(datum: Double) { - throw new UnsupportedOperationException - } - - /** - * @throws UnsupportedOperationException - */ - override def changeDatum(delta: Double) { - throw new UnsupportedOperationException - } - - override def getCount: Int = { - delegate.getCount - } - - override def getAverage: Double = { - -delegate.getAverage - } - - override def getStandardDeviation: Double = { - delegate.getStandardDeviation - } - - override def inverse: RunningAverageAndStdDev = { - delegate - } -} - - - - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala b/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala deleted file mode 100644 index d421fa1..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala +++ /dev/null @@ -1,459 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package org.apache.mahout.classifier.stats - -import java.util -import org.apache.commons.math3.stat.descriptive.moment.Mean // This is brought in by mahout-math -import org.apache.mahout.math.{DenseMatrix, Matrix} -import scala.collection.mutable -import scala.collection.JavaConversions._ - -/** - * - * Ported from org.apache.mahout.classifier.ConfusionMatrix.java - * - * The ConfusionMatrix Class stores the result of Classification of a Test Dataset. - * - * The fact of whether there is a default is not stored. A row of zeros is the only indicator that there is no default. - * - * See http://en.wikipedia.org/wiki/Confusion_matrix for background - * - * - * @param labels The labels to consider for classification - * @param defaultLabel default unknown label - */ -class ConfusionMatrix(private var labels: util.Collection[String] = null, - private var defaultLabel: String = "unknown") { - /** - * Matrix Constructor - */ -// def this(m: Matrix) { -// this() -// confusionMatrix = Array.ofDim[Int](m.numRows, m.numRows) -// setMatrix(m) -// } - - // val LOG: Logger = LoggerFactory.getLogger(classOf[ConfusionMatrix]) - - var confusionMatrix = Array.ofDim[Int](labels.size + 1, labels.size + 1) - - val labelMap = new mutable.HashMap[String,Integer]() - - var samples: Int = 0 - - var i: Integer = 0 - for (label <- labels) { - labelMap.put(label, i) - i+=1 - } - labelMap.put(defaultLabel, i) - - - def getConfusionMatrix: Array[Array[Int]] = confusionMatrix - - def getLabels = labelMap.keys.toList - - def numLabels: Int = labelMap.size - - def getAccuracy(label: String): Double = { - val labelId: Int = labelMap(label) - var labelTotal: Int = 0 - var correct: Int = 0 - for (i <- 0 until numLabels) { - labelTotal += confusionMatrix(labelId)(i) - if (i == labelId) { - correct += confusionMatrix(labelId)(i) - } - } - - 100.0 * correct / labelTotal - } - - def getAccuracy: Double = { - var total: Int = 0 - var correct: Int = 0 - for (i <- 0 until numLabels) { - for (j <- 0 until numLabels) { - total += confusionMatrix(i)(j) - if (i == j) { - correct += confusionMatrix(i)(j) - } - } - } - - 100.0 * correct / total - } - - /** Sum of true positives and false negatives */ - private def getActualNumberOfTestExamplesForClass(label: String): Int = { - val labelId: Int = labelMap(label) - var sum: Int = 0 - for (i <- 0 until numLabels) { - sum += confusionMatrix(labelId)(i) - } - sum - } - - def getPrecision(label: String): Double = { - val labelId: Int = labelMap(label) - val truePositives: Int = confusionMatrix(labelId)(labelId) - var falsePositives: Int = 0 - - for (i <- 0 until numLabels) { - if (i != labelId) { - falsePositives += confusionMatrix(i)(labelId) - } - } - - if (truePositives + falsePositives == 0) { - 0 - } else { - truePositives.asInstanceOf[Double] / (truePositives + falsePositives) - } - } - - - def getWeightedPrecision: Double = { - val precisions: Array[Double] = new Array[Double](numLabels) - val weights: Array[Double] = new Array[Double](numLabels) - var index: Int = 0 - for (label <- labelMap.keys) { - precisions(index) = getPrecision(label) - weights(index) = getActualNumberOfTestExamplesForClass(label) - index += 1 - } - new Mean().evaluate(precisions, weights) - } - - def getRecall(label: String): Double = { - val labelId: Int = labelMap(label) - val truePositives: Int = confusionMatrix(labelId)(labelId) - var falseNegatives: Int = 0 - for (i <- 0 until numLabels) { - if (i != labelId) { - falseNegatives += confusionMatrix(labelId)(i) - } - } - - if (truePositives + falseNegatives == 0) { - 0 - } else { - truePositives.asInstanceOf[Double] / (truePositives + falseNegatives) - } - } - - def getWeightedRecall: Double = { - val recalls: Array[Double] = new Array[Double](numLabels) - val weights: Array[Double] = new Array[Double](numLabels) - var index: Int = 0 - for (label <- labelMap.keys) { - recalls(index) = getRecall(label) - weights(index) = getActualNumberOfTestExamplesForClass(label) - index += 1 - } - new Mean().evaluate(recalls, weights) - } - - def getF1score(label: String): Double = { - val precision: Double = getPrecision(label) - val recall: Double = getRecall(label) - if (precision + recall == 0) { - 0 - } else { - 2 * precision * recall / (precision + recall) - } - } - - def getWeightedF1score: Double = { - val f1Scores: Array[Double] = new Array[Double](numLabels) - val weights: Array[Double] = new Array[Double](numLabels) - var index: Int = 0 - for (label <- labelMap.keys) { - f1Scores(index) = getF1score(label) - weights(index) = getActualNumberOfTestExamplesForClass(label) - index += 1 - } - new Mean().evaluate(f1Scores, weights) - } - - def getReliability: Double = { - var count: Int = 0 - var accuracy: Double = 0 - for (label <- labelMap.keys) { - if (!(label == defaultLabel)) { - accuracy += getAccuracy(label) - } - count += 1 - } - accuracy / count - } - - /** - * Accuracy v.s. randomly classifying all samples. - * kappa() = (totalAccuracy() - randomAccuracy()) / (1 - randomAccuracy()) - * Cohen, Jacob. 1960. A coefficient of agreement for nominal scales. - * Educational And Psychological Measurement 20:37-46. - * - * Formula and variable names from: - * http://www.yale.edu/ceo/OEFS/Accuracy.pdf - * - * @return double - */ - def getKappa: Double = { - var a: Double = 0.0 - var b: Double = 0.0 - for (i <- confusionMatrix.indices) { - a += confusionMatrix(i)(i) - var br: Int = 0 - for (j <- confusionMatrix.indices) { - br += confusionMatrix(i)(j) - } - var bc: Int = 0 - //TODO: verify this as an iterator - for (vec <- confusionMatrix) { - bc += vec(i) - } - b += br * bc - } - (samples * a - b) / (samples * samples - b) - } - - def getCorrect(label: String): Int = { - val labelId: Int = labelMap(label) - confusionMatrix(labelId)(labelId) - } - - def getTotal(label: String): Int = { - val labelId: Int = labelMap(label) - var labelTotal: Int = 0 - for (i <- 0 until numLabels) { - labelTotal += confusionMatrix(labelId)(i) - } - labelTotal - } - - /** - * Standard deviation of normalized producer accuracy - * Not a standard score - * @return double - */ - def getNormalizedStats: RunningAverageAndStdDev = { - val summer = new FullRunningAverageAndStdDev() - for (d <- confusionMatrix.indices) { - var total: Double = 0.0 - for (j <- confusionMatrix.indices) { - total += confusionMatrix(d)(j) - } - summer.addDatum(confusionMatrix(d)(d) / (total + 0.000001)) - } - summer - } - - def addInstance(correctLabel: String, classifiedResult: ClassifierResult): Unit = { - samples += 1 - incrementCount(correctLabel, classifiedResult.getLabel) - } - - def addInstance(correctLabel: String, classifiedLabel: String): Unit = { - samples += 1 - incrementCount(correctLabel, classifiedLabel) - } - - def getCount(correctLabel: String, classifiedLabel: String): Int = { - if (!labelMap.containsKey(correctLabel)) { - // LOG.warn("Label {} did not appear in the training examples", correctLabel) - return 0 - } - assert(labelMap.containsKey(classifiedLabel), "Label not found: " + classifiedLabel) - val correctId: Int = labelMap(correctLabel) - val classifiedId: Int = labelMap(classifiedLabel) - confusionMatrix(correctId)(classifiedId) - } - - def putCount(correctLabel: String, classifiedLabel: String, count: Int): Unit = { - if (!labelMap.containsKey(correctLabel)) { - // LOG.warn("Label {} did not appear in the training examples", correctLabel) - return - } - assert(labelMap.containsKey(classifiedLabel), "Label not found: " + classifiedLabel) - val correctId: Int = labelMap(correctLabel) - val classifiedId: Int = labelMap(classifiedLabel) - if (confusionMatrix(correctId)(classifiedId) == 0.0 && count != 0) { - samples += 1 - } - confusionMatrix(correctId)(classifiedId) = count - } - - def incrementCount(correctLabel: String, classifiedLabel: String, count: Int): Unit = { - putCount(correctLabel, classifiedLabel, count + getCount(correctLabel, classifiedLabel)) - } - - def incrementCount(correctLabel: String, classifiedLabel: String): Unit = { - incrementCount(correctLabel, classifiedLabel, 1) - } - - def getDefaultLabel: String = { - defaultLabel - } - - def merge(b: ConfusionMatrix): ConfusionMatrix = { - assert(labelMap.size == b.getLabels.size, "The label sizes do not match") - for (correctLabel <- this.labelMap.keys) { - for (classifiedLabel <- this.labelMap.keys) { - incrementCount(correctLabel, classifiedLabel, b.getCount(correctLabel, classifiedLabel)) - } - } - this - } - - def getMatrix: Matrix = { - val length: Int = confusionMatrix.length - val m: Matrix = new DenseMatrix(length, length) - - val labels: java.util.HashMap[String, Integer] = new java.util.HashMap() - - for (r <- 0 until length) { - for (c <- 0 until length) { - m.set(r, c, confusionMatrix(r)(c)) - } - } - - for (entry <- labelMap.entrySet) { - labels.put(entry.getKey, entry.getValue) - } - m.setRowLabelBindings(labels) - m.setColumnLabelBindings(labels) - - m - } - - def setMatrix(m: Matrix) : Unit = { - val length: Int = confusionMatrix.length - if (m.numRows != m.numCols) { - throw new IllegalArgumentException("ConfusionMatrix: matrix(" + m.numRows + ',' + m.numCols + ") must be square") - } - - for (r <- 0 until length) { - for (c <- 0 until length) { - confusionMatrix(r)(c) = Math.round(m.get(r, c)).toInt - } - } - - var labels = m.getRowLabelBindings - if (labels == null) { - labels = m.getColumnLabelBindings - } - - if (labels != null) { - val sorted: Array[String] = sortLabels(labels) - verifyLabels(length, sorted) - labelMap.clear - for (i <- 0 until length) { - labelMap.put(sorted(i), i) - } - } - } - - def verifyLabels(length: Int, sorted: Array[String]): Unit = { - assert(sorted.length == length, "One label, one row") - for (i <- 0 until length) { - if (sorted(i) == null) { - assert(assertion = false, "One label, one row") - } - } - } - - def sortLabels(labels: java.util.Map[String, Integer]): Array[String] = { - val sorted: Array[String] = new Array[String](labels.size) - for (entry <- labels.entrySet) { - sorted(entry.getValue) = entry.getKey - } - - sorted - } - - /** - * This is overloaded. toString() is not a formatted report you print for a manager :) - * Assume that if there are no default assignments, the default feature was not used - */ - override def toString: String = { - - val returnString: StringBuilder = new StringBuilder(200) - - returnString.append("=======================================================").append('\n') - returnString.append("Confusion Matrix\n") - returnString.append("-------------------------------------------------------").append('\n') - - val unclassified: Int = getTotal(defaultLabel) - - for (entry <- this.labelMap.entrySet) { - if (!((entry.getKey == defaultLabel) && unclassified == 0)) { - returnString.append(getSmallLabel(entry.getValue) + " ").append('\t') - } - } - - returnString.append("<--Classified as").append('\n') - - for (entry <- this.labelMap.entrySet) { - if (!((entry.getKey == defaultLabel) && unclassified == 0)) { - val correctLabel: String = entry.getKey - var labelTotal: Int = 0 - - for (classifiedLabel <- this.labelMap.keySet) { - if (!((classifiedLabel == defaultLabel) && unclassified == 0)) { - returnString.append(Integer.toString(getCount(correctLabel, classifiedLabel)) + " ") - .append('\t') - labelTotal += getCount(correctLabel, classifiedLabel) - } - } - returnString.append(" | ").append(String.valueOf(labelTotal) + " ") - .append('\t') - .append(getSmallLabel(entry.getValue) + " ") - .append(" = ") - .append(correctLabel) - .append('\n') - } - } - - if (unclassified > 0) { - returnString.append("Default Category: ") - .append(defaultLabel) - .append(": ") - .append(unclassified) - .append('\n') - } - returnString.append('\n') - - returnString.toString() - } - - - def getSmallLabel(i: Int): String = { - var value: Int = i - val returnString: StringBuilder = new StringBuilder - do { - val n: Int = value % 26 - returnString.insert(0, ('a' + n).asInstanceOf[Char]) - value /= 26 - } while (value > 0) - - returnString.toString() - } - - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/common/io/GenericMatrixKryoSerializer.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/common/io/GenericMatrixKryoSerializer.scala b/math-scala/src/main/scala/org/apache/mahout/common/io/GenericMatrixKryoSerializer.scala deleted file mode 100644 index 534d37c..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/common/io/GenericMatrixKryoSerializer.scala +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.common.io - -import com.esotericsoftware.kryo.io.{Input, Output} -import com.esotericsoftware.kryo.{Kryo, Serializer} -import org.apache.log4j.Logger -import org.apache.mahout.logging._ -import org.apache.mahout.math._ -import org.apache.mahout.math.flavor.TraversingStructureEnum -import org.apache.mahout.math.scalabindings.RLikeOps._ -import org.apache.mahout.math.scalabindings._ - -import scala.collection.JavaConversions._ - -object GenericMatrixKryoSerializer { - - private implicit final val log = Logger.getLogger(classOf[GenericMatrixKryoSerializer]) - -} - -/** Serializes Sparse or Dense in-core generic matrix (row-wise or column-wise backed) */ -class GenericMatrixKryoSerializer extends Serializer[Matrix] { - - import GenericMatrixKryoSerializer._ - - override def write(kryo: Kryo, output: Output, mx: Matrix): Unit = { - - debug(s"Writing mx of type ${mx.getClass.getName}") - - val structure = mx.getFlavor.getStructure - - // Write structure bit - output.writeInt(structure.ordinal(), true) - - // Write geometry - output.writeInt(mx.nrow, true) - output.writeInt(mx.ncol, true) - - // Write in most efficient traversal order (using backing vectors perhaps) - structure match { - case TraversingStructureEnum.COLWISE => writeRowWise(kryo, output, mx.t) - case TraversingStructureEnum.SPARSECOLWISE => writeSparseRowWise(kryo, output, mx.t) - case TraversingStructureEnum.SPARSEROWWISE => writeSparseRowWise(kryo, output, mx) - case TraversingStructureEnum.VECTORBACKED => writeVectorBacked(kryo, output, mx) - case _ => writeRowWise(kryo, output, mx) - } - - } - - private def writeVectorBacked(kryo: Kryo, output: Output, mx: Matrix) { - - require(mx != null) - - // At this point we are just doing some vector-backed classes individually. TODO: create - // api to obtain vector-backed matrix data. - kryo.writeClass(output, mx.getClass) - mx match { - case mxD: DiagonalMatrix => kryo.writeObject(output, mxD.diagv) - case mxS: DenseSymmetricMatrix => kryo.writeObject(output, dvec(mxS.getData)) - case mxT: UpperTriangular => kryo.writeObject(output, dvec(mxT.getData)) - case _ => throw new IllegalArgumentException(s"Unsupported matrix type:${mx.getClass.getName}") - } - } - - private def readVectorBacked(kryo: Kryo, input: Input, nrow: Int, ncol: Int) = { - - // We require vector-backed matrices to have vector-parameterized constructor to construct. - val clazz = kryo.readClass(input).getType - - debug(s"Deserializing vector-backed mx of type ${clazz.getName}.") - - clazz.getConstructor(classOf[Vector]).newInstance(kryo.readObject(input, classOf[Vector])).asInstanceOf[Matrix] - } - - private def writeRowWise(kryo: Kryo, output: Output, mx: Matrix): Unit = { - for (row <- mx) kryo.writeObject(output, row) - } - - private def readRows(kryo: Kryo, input: Input, nrow: Int) = { - Array.tabulate(nrow) { _ => kryo.readObject(input, classOf[Vector])} - } - - private def readSparseRows(kryo: Kryo, input: Input) = { - - // Number of slices - val nslices = input.readInt(true) - - Array.tabulate(nslices) { _ => - input.readInt(true) -> kryo.readObject(input, classOf[Vector]) - } - } - - private def writeSparseRowWise(kryo: Kryo, output: Output, mx: Matrix): Unit = { - - val nslices = mx.numSlices() - - output.writeInt(nslices, true) - - var actualNSlices = 0 - for (row <- mx.iterateNonEmpty()) { - output.writeInt(row.index(), true) - kryo.writeObject(output, row.vector()) - actualNSlices += 1 - } - - require(nslices == actualNSlices, "Number of slices reported by Matrix.numSlices() was different from actual " + - "slice iterator size.") - } - - override def read(kryo: Kryo, input: Input, mxClass: Class[Matrix]): Matrix = { - - // Read structure hint - val structure = TraversingStructureEnum.values()(input.readInt(true)) - - // Read geometry - val nrow = input.readInt(true) - val ncol = input.readInt(true) - - debug(s"read matrix geometry: $nrow x $ncol.") - - structure match { - - // Sparse or dense column wise - case TraversingStructureEnum.COLWISE => - val cols = readRows(kryo, input, ncol) - - if (!cols.isEmpty && cols.head.isDense) - dense(cols).t - else { - debug("Deserializing as SparseRowMatrix.t (COLWISE).") - new SparseRowMatrix(ncol, nrow, cols, true, false).t - } - - // transposed SparseMatrix case - case TraversingStructureEnum.SPARSECOLWISE => - val cols = readSparseRows(kryo, input) - val javamap = new java.util.HashMap[Integer, Vector]((cols.size << 1) + 1) - cols.foreach { case (idx, vec) => javamap.put(idx, vec)} - - debug("Deserializing as SparseMatrix.t (SPARSECOLWISE).") - new SparseMatrix(ncol, nrow, javamap, true).t - - // Sparse Row-wise -- this will be created as a SparseMatrix. - case TraversingStructureEnum.SPARSEROWWISE => - val rows = readSparseRows(kryo, input) - val javamap = new java.util.HashMap[Integer, Vector]((rows.size << 1) + 1) - rows.foreach { case (idx, vec) => javamap.put(idx, vec)} - - debug("Deserializing as SparseMatrix (SPARSEROWWISE).") - new SparseMatrix(nrow, ncol, javamap, true) - case TraversingStructureEnum.VECTORBACKED => - - debug("Deserializing vector-backed...") - readVectorBacked(kryo, input, nrow, ncol) - - // By default, read row-wise. - case _ => - val cols = readRows(kryo, input, nrow) - // this still copies a lot of stuff... - if (!cols.isEmpty && cols.head.isDense) { - - debug("Deserializing as DenseMatrix.") - dense(cols) - } else { - - debug("Deserializing as SparseRowMatrix(default).") - new SparseRowMatrix(nrow, ncol, cols, true, false) - } - } - - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/common/io/VectorKryoSerializer.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/common/io/VectorKryoSerializer.scala b/math-scala/src/main/scala/org/apache/mahout/common/io/VectorKryoSerializer.scala deleted file mode 100644 index 3cc537c..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/common/io/VectorKryoSerializer.scala +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.common.io - -import com.esotericsoftware.kryo.io.{Input, Output} -import com.esotericsoftware.kryo.{Kryo, Serializer} -import org.apache.mahout.logging._ -import org.apache.mahout.math._ -import org.apache.mahout.math.scalabindings.RLikeOps._ - -import scala.collection.JavaConversions._ - - -object VectorKryoSerializer { - - final val FLAG_DENSE: Int = 0x01 - final val FLAG_SEQUENTIAL: Int = 0x02 - final val FLAG_NAMED: Int = 0x04 - final val FLAG_LAX_PRECISION: Int = 0x08 - - private final implicit val log = getLog(classOf[VectorKryoSerializer]) - -} - -class VectorKryoSerializer(val laxPrecision: Boolean = false) extends Serializer[Vector] { - - import VectorKryoSerializer._ - - override def write(kryo: Kryo, output: Output, vector: Vector): Unit = { - - require(vector != null) - - trace(s"Serializing vector of ${vector.getClass.getName} class.") - - // Write length - val len = vector.length - output.writeInt(len, true) - - // Interrogate vec properties - val dense = vector.isDense - val sequential = vector.isSequentialAccess - val named = vector.isInstanceOf[NamedVector] - - var flag = 0 - - if (dense) { - flag |= FLAG_DENSE - } else if (sequential) { - flag |= FLAG_SEQUENTIAL - } - - if (vector.isInstanceOf[NamedVector]) { - flag |= FLAG_NAMED - } - - if (laxPrecision) flag |= FLAG_LAX_PRECISION - - // Write flags - output.writeByte(flag) - - // Write name if needed - if (named) output.writeString(vector.asInstanceOf[NamedVector].getName) - - dense match { - - // Dense vector. - case true => - - laxPrecision match { - case true => - for (i <- 0 until vector.length) output.writeFloat(vector(i).toFloat) - case _ => - for (i <- 0 until vector.length) output.writeDouble(vector(i)) - } - case _ => - - // Turns out getNumNonZeroElements must check every element if it is indeed non-zero. The - // iterateNonZeros() on the other hand doesn't do that, so that's all inconsistent right - // now. so we'll just auto-terminate. - val iter = vector.nonZeroes.toIterator.filter(_.get() != 0.0) - - sequential match { - - // Delta encoding - case true => - - var idx = 0 - laxPrecision match { - case true => - while (iter.hasNext) { - val el = iter.next() - output.writeFloat(el.toFloat) - output.writeInt(el.index() - idx, true) - idx = el.index - } - // Terminate delta encoding. - output.writeFloat(0.0.toFloat) - case _ => - while (iter.hasNext) { - val el = iter.next() - output.writeDouble(el.get()) - output.writeInt(el.index() - idx, true) - idx = el.index - } - // Terminate delta encoding. - output.writeDouble(0.0) - } - - // Random access. - case _ => - - laxPrecision match { - - case true => - iter.foreach { el => - output.writeFloat(el.get().toFloat) - output.writeInt(el.index(), true) - } - // Terminate random access with 0.0 value. - output.writeFloat(0.0.toFloat) - case _ => - iter.foreach { el => - output.writeDouble(el.get()) - output.writeInt(el.index(), true) - } - // Terminate random access with 0.0 value. - output.writeDouble(0.0) - } - - } - - } - } - - override def read(kryo: Kryo, input: Input, vecClass: Class[Vector]): Vector = { - - val len = input.readInt(true) - val flags = input.readByte().toInt - val name = if ((flags & FLAG_NAMED) != 0) Some(input.readString()) else None - - val vec: Vector = flags match { - - // Dense - case _: Int if (flags & FLAG_DENSE) != 0 => - - trace(s"Deserializing dense vector.") - - if ((flags & FLAG_LAX_PRECISION) != 0) { - new DenseVector(len) := { _ => input.readFloat()} - } else { - new DenseVector(len) := { _ => input.readDouble()} - } - - // Sparse case. - case _ => - - flags match { - - // Sequential. - case _: Int if (flags & FLAG_SEQUENTIAL) != 0 => - - trace("Deserializing as sequential sparse vector.") - - val v = new SequentialAccessSparseVector(len) - var idx = 0 - var stop = false - - if ((flags & FLAG_LAX_PRECISION) != 0) { - - while (!stop) { - val value = input.readFloat() - if (value == 0.0) { - stop = true - } else { - idx += input.readInt(true) - v(idx) = value - } - } - } else { - while (!stop) { - val value = input.readDouble() - if (value == 0.0) { - stop = true - } else { - idx += input.readInt(true) - v(idx) = value - } - } - } - v - - // Random access - case _ => - - trace("Deserializing as random access vector.") - - // Read pairs until we see 0.0 value. Prone to corruption attacks obviously. - val v = new RandomAccessSparseVector(len) - var stop = false - if ((flags & FLAG_LAX_PRECISION) != 0) { - while (! stop ) { - val value = input.readFloat() - if ( value == 0.0 ) { - stop = true - } else { - val idx = input.readInt(true) - v(idx) = value - } - } - } else { - while (! stop ) { - val value = input.readDouble() - if (value == 0.0) { - stop = true - } else { - val idx = input.readInt(true) - v(idx) = value - } - } - } - v - } - } - - name.map{name => - - trace(s"Recovering named vector's name $name.") - - new NamedVector(vec, name) - } - .getOrElse(vec) - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala b/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala deleted file mode 100644 index 32515f1..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.drivers - -import org.apache.mahout.math.drm.DistributedContext - -/** Extended by a platform specific version of this class to create a Mahout CLI driver. */ -abstract class MahoutDriver { - - implicit protected var mc: DistributedContext = _ - implicit protected var parser: MahoutOptionParser = _ - - var _useExistingContext: Boolean = false // used in the test suite to reuse one context per suite - - /** must be overriden to setup the DistributedContext mc*/ - protected def start() : Unit - - /** Override (optionally) for special cleanup */ - protected def stop(): Unit = { - if (!_useExistingContext) mc.close - } - - /** This is where you do the work, call start first, then before exiting call stop */ - protected def process(): Unit - - /** Parse command line and call process */ - def main(args: Array[String]): Unit - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala b/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala deleted file mode 100644 index d3723a2..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutOptionParser.scala +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.mahout.drivers - -import scopt.OptionParser - -import scala.collection.immutable - -/** - * Defines oft-repeated options and their parsing. Provides the option groups and parsing helper methods to - * keep both standarized. - * @param programName Name displayed in help message, the name by which the driver is invoked. - * @note options are engine neutral by convention. See the engine specific extending class for - * to add Spark or other engine options. - */ -class MahoutOptionParser(programName: String) extends OptionParser[Map[String, Any]](programName: String) { - - // build options from some stardard CLI param groups - // Note: always put the driver specific options at the last so they can override any previous options! - var opts = Map.empty[String, Any] - - override def showUsageOnError = true - - def parseIOOptions(numInputs: Int = 1) = { - opts = opts ++ MahoutOptionParser.FileIOOptions - note("Input, output options") - opt[String]('i', "input") required() action { (x, options) => - options + ("input" -> x) - } text ("Input path, may be a filename, directory name, or comma delimited list of HDFS supported URIs" + - " (required)") - - if (numInputs == 2) { - opt[String]("input2") abbr ("i2") action { (x, options) => - options + ("input2" -> x) - } text ("Secondary input path for cross-similarity calculation, same restrictions as \"--input\" " + - "(optional). Default: empty.") - } - - opt[String]('o', "output") required() action { (x, options) => - if (x.endsWith("/")) { - options + ("output" -> x) - } else { - options + ("output" -> (x + "/")) - } - } text ("Path for output directory, any HDFS supported URI (required)") - - } - - def parseGenericOptions() = { - opts = opts ++ MahoutOptionParser.GenericOptions - opt[Int]("randomSeed") abbr ("rs") action { (x, options) => - options + ("randomSeed" -> x) - } validate { x => - if (x > 0) success else failure("Option --randomSeed must be > 0") - } - - //output both input IndexedDatasets - opt[Unit]("writeAllDatasets") hidden() action { (_, options) => - options + ("writeAllDatasets" -> true) - }//Hidden option, though a user might want this. - } - - def parseElementInputSchemaOptions() = { - //Input text file schema--not driver specific but input data specific, elements input, - // not rows of IndexedDatasets - opts = opts ++ MahoutOptionParser.TextDelimitedElementsOptions - note("\nInput text file schema options:") - opt[String]("inDelim") abbr ("id") text ("Input delimiter character (optional). Default: \"[ ,\\t]\"") action { - (x, options) => - options + ("inDelim" -> x) - } - - opt[String]("filter1") abbr ("f1") action { (x, options) => - options + ("filter1" -> x) - } text ("String (or regex) whose presence indicates a datum for the primary item set (optional). " + - "Default: no filter, all data is used") - - opt[String]("filter2") abbr ("f2") action { (x, options) => - options + ("filter2" -> x) - } text ("String (or regex) whose presence indicates a datum for the secondary item set (optional). " + - "If not present no secondary dataset is collected") - - opt[Int]("rowIDColumn") abbr ("rc") action { (x, options) => - options + ("rowIDColumn" -> x) - } text ("Column number (0 based Int) containing the row ID string (optional). Default: 0") validate { - x => - if (x >= 0) success else failure("Option --rowIDColNum must be >= 0") - } - - opt[Int]("itemIDColumn") abbr ("ic") action { (x, options) => - options + ("itemIDColumn" -> x) - } text ("Column number (0 based Int) containing the item ID string (optional). Default: 1") validate { - x => - if (x >= 0) success else failure("Option --itemIDColNum must be >= 0") - } - - opt[Int]("filterColumn") abbr ("fc") action { (x, options) => - options + ("filterColumn" -> x) - } text ("Column number (0 based Int) containing the filter string (optional). Default: -1 for no " + - "filter") validate { x => - if (x >= -1) success else failure("Option --filterColNum must be >= -1") - } - - note("\nUsing all defaults the input is expected of the form: \"userIDitemId\" or" + - " \"userIDitemIDany-text...\" and all rows will be used") - - //check for column consistency - checkConfig { options: Map[String, Any] => - if (options("filterColumn").asInstanceOf[Int] == options("itemIDColumn").asInstanceOf[Int] - || options("filterColumn").asInstanceOf[Int] == options("rowIDColumn").asInstanceOf[Int] - || options("rowIDColumn").asInstanceOf[Int] == options("itemIDColumn").asInstanceOf[Int]) - failure("The row, item, and filter positions must be unique.") else success - } - - //check for filter consistency - checkConfig { options: Map[String, Any] => - if (options("filter1").asInstanceOf[String] != null.asInstanceOf[String] - && options("filter2").asInstanceOf[String] != null.asInstanceOf[String] - && options("filter1").asInstanceOf[String] == options("filter2").asInstanceOf[String]) - failure ("If using filters they must be unique.") else success - } - - } - - def parseFileDiscoveryOptions() = { - //File finding strategy--not driver specific - opts = opts ++ MahoutOptionParser.FileDiscoveryOptions - note("\nFile discovery options:") - opt[Unit]('r', "recursive") action { (_, options) => - options + ("recursive" -> true) - } text ("Searched the -i path recursively for files that match --filenamePattern (optional), Default: false") - - opt[String]("filenamePattern") abbr ("fp") action { (x, options) => - options + ("filenamePattern" -> x) - } text ("Regex to match in determining input files (optional). Default: filename in the --input option " + - "or \"^part-.*\" if --input is a directory") - - } - - def parseIndexedDatasetFormatOptions(notice: String = "\nOutput text file schema options:") = { - opts = opts ++ MahoutOptionParser.TextDelimitedIndexedDatasetOptions - note(notice) - opt[String]("rowKeyDelim") abbr ("rd") action { (x, options) => - options + ("rowKeyDelim" -> x) - } text ("Separates the rowID key from the vector values list (optional). Default: \"\\t\"") - - opt[String]("columnIdStrengthDelim") abbr ("cd") action { (x, options) => - options + ("columnIdStrengthDelim" -> x) - } text ("Separates column IDs from their values in the vector values list (optional). Default: \":\"") - - opt[String]("elementDelim") abbr ("td") action { (x, options) => - options + ("elementDelim" -> x) - } text ("Separates vector element values in the values list (optional). Default: \" \"") - - opt[Unit]("omitStrength") abbr ("os") action { (_, options) => - options + ("omitStrength" -> true) - } text ("Do not write the strength to the output files (optional), Default: false.") - note("This option is used to output indexable data for creating a search engine recommender.") - - note("\nDefault delimiters will produce output of the form: " + - "\"itemID1itemID2:value2itemID10:value10...\"") - } - -} - -/** - * Companion object defines default option groups for reference in any driver that needs them. - * @note not all options are platform neutral so other platforms can add default options here if desired - */ -object MahoutOptionParser { - - // set up the various default option groups - final val GenericOptions = immutable.HashMap[String, Any]( - "randomSeed" -> System.currentTimeMillis().toInt, - "writeAllDatasets" -> false) - - final val SparkOptions = immutable.HashMap[String, Any]( - "master" -> "local", - "sparkExecutorMem" -> "", - "appName" -> "Generic Spark App, Change this.") - - final val FileIOOptions = immutable.HashMap[String, Any]( - "input" -> null.asInstanceOf[String], - "input2" -> null.asInstanceOf[String], - "output" -> null.asInstanceOf[String]) - - final val FileDiscoveryOptions = immutable.HashMap[String, Any]( - "recursive" -> false, - "filenamePattern" -> "^part-.*") - - final val TextDelimitedElementsOptions = immutable.HashMap[String, Any]( - "rowIDColumn" -> 0, - "itemIDColumn" -> 1, - "filterColumn" -> -1, - "filter1" -> null.asInstanceOf[String], - "filter2" -> null.asInstanceOf[String], - "inDelim" -> "[,\t ]") - - final val TextDelimitedIndexedDatasetOptions = immutable.HashMap[String, Any]( - "rowKeyDelim" -> "\t", - "columnIdStrengthDelim" -> ":", - "elementDelim" -> " ", - "omitStrength" -> false) -} - - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/logging/package.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/logging/package.scala b/math-scala/src/main/scala/org/apache/mahout/logging/package.scala deleted file mode 100644 index 15aa909..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/logging/package.scala +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout - -import org.apache.log4j.{Level, Priority, Logger} - -package object logging { - - /** Compute `expr` if debug is on, only */ - def debugDo[T](expr: => T)(implicit log: Logger): Option[T] = { - if (log.isDebugEnabled) Some(expr) - else None - } - - /** Compute `expr` if trace is on, only */ - def traceDo[T](expr: => T)(implicit log: Logger): Option[T] = { - if (log.isTraceEnabled) Some(expr) else None - } - - /** Shorter, and lazy, versions of logging methods. Just declare log implicit. */ - def debug(msg: => AnyRef)(implicit log: Logger) { if (log.isDebugEnabled) log.debug(msg) } - - def debug(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isDebugEnabled()) log.debug(msg, t) } - - /** Shorter, and lazy, versions of logging methods. Just declare log implicit. */ - def trace(msg: => AnyRef)(implicit log: Logger) { if (log.isTraceEnabled) log.trace(msg) } - - def trace(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isTraceEnabled()) log.trace(msg, t) } - - def info(msg: => AnyRef)(implicit log: Logger) { if (log.isInfoEnabled) log.info(msg)} - - def info(msg: => AnyRef, t:Throwable)(implicit log: Logger) { if (log.isInfoEnabled) log.info(msg,t)} - - def warn(msg: => AnyRef)(implicit log: Logger) { if (log.isEnabledFor(Level.WARN)) log.warn(msg) } - - def warn(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isEnabledFor(Level.WARN)) error(msg, t) } - - def error(msg: => AnyRef)(implicit log: Logger) { if (log.isEnabledFor(Level.ERROR)) log.warn(msg) } - - def error(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isEnabledFor(Level.ERROR)) error(msg, t) } - - def fatal(msg: => AnyRef)(implicit log: Logger) { if (log.isEnabledFor(Level.FATAL)) log.fatal(msg) } - - def fatal(msg: => AnyRef, t: Throwable)(implicit log: Logger) { if (log.isEnabledFor(Level.FATAL)) log.fatal(msg, t) } - - def getLog(name: String): Logger = Logger.getLogger(name) - - def getLog(clazz: Class[_]): Logger = Logger.getLogger(clazz) - - def mahoutLog :Logger = getLog("org.apache.mahout") - - def setLogLevel(l:Level)(implicit log:Logger) = { - log.setLevel(l) - } - - def setAdditivity(a:Boolean)(implicit log:Logger) = log.setAdditivity(a) - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala deleted file mode 100644 index 244cefc..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Fitter.scala +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.mahout.math.algorithms - -trait Fitter { - - // all models must have a fit method... signatures change. - // leaving this as place holder incase we decide there are somethings all Models must have in common - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala deleted file mode 100644 index 0fbe8ac..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/Model.scala +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.mahout.math.algorithms - -trait Model extends Serializable { - - var summary: String = "" - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala deleted file mode 100644 index bf85dee..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedFitter.scala +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.mahout.math.algorithms - -import org.apache.mahout.math.drm.DrmLike - -trait SupervisedFitter[K, M <: SupervisedModel[K]] extends Fitter { - - def fit(drmX : DrmLike[K], - drmTarget: DrmLike[K], - hyperparameters: (Symbol, Any)*): M -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala deleted file mode 100644 index 57c20e7..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/SupervisedModel.scala +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.mahout.math.algorithms - -import scala.collection.mutable - -trait SupervisedModel[K] extends Model { - var testResults: mutable.Map[Symbol, Any] = mutable.Map[Symbol, Any]() -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala deleted file mode 100644 index 5c191d1..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedFitter.scala +++ /dev/null @@ -1,28 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.mahout.math.algorithms - -import org.apache.mahout.math.drm.DrmLike - -trait UnsupervisedFitter extends Fitter { - - def fit[K](input: DrmLike[K], - hyperparameters: (Symbol, Any)*): UnsupervisedModel -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala b/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala deleted file mode 100644 index f8ff341..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/algorithms/UnsupervisedModel.scala +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.mahout.math.algorithms - -trait UnsupervisedModel extends Model { - -}