Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 6667A200CB5 for ; Tue, 27 Jun 2017 18:14:36 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 64FE7160C03; Tue, 27 Jun 2017 16:14:36 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 00470160BFD for ; Tue, 27 Jun 2017 18:14:33 +0200 (CEST) Received: (qmail 87299 invoked by uid 500); 27 Jun 2017 16:14:30 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 85207 invoked by uid 99); 27 Jun 2017 16:14:29 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 27 Jun 2017 16:14:29 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 5CD94F2139; Tue, 27 Jun 2017 16:14:26 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: vanstee@apache.org To: commits@mahout.apache.org Date: Tue, 27 Jun 2017 16:14:38 -0000 Message-Id: <6c441a6b68b045a896d11bab2255f105@git.apache.org> In-Reply-To: <7a54dd6c14144c2ea76887793d2dc849@git.apache.org> References: <7a54dd6c14144c2ea76887793d2dc849@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [13/52] [partial] mahout git commit: removed all files except for website directory archived-at: Tue, 27 Jun 2017 16:14:36 -0000 http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeVectorOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeVectorOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeVectorOps.scala deleted file mode 100644 index ca3573f..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeVectorOps.scala +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math.Vector -import org.apache.mahout.math.function.Functions -import RLikeOps._ - -/** - * R-like operators. - * - * For now, all element-wise operators are declared private to math package - * since we are still discussing what is the best approach to have to replace - * Matlab syntax for elementwise '.*' since it is not directly available for - * Scala DSL. - * - * @param _v - */ -class MatlabLikeVectorOps(_v: Vector) extends VectorOps(_v) { - - /** Elementwise *= */ - private[math] def *@=(that: Vector) = v.assign(that, Functions.MULT) - - /** Elementwise /= */ - private[math] def /@=(that: Vector) = v.assign(that, Functions.DIV) - - /** Elementwise *= */ - private[math] def *@=(that: Double) = v.assign(Functions.MULT, that) - - /** Elementwise /= */ - private[math] def /@=(that: Double) = v.assign(Functions.DIV, that) - - /** Elementwise right-associative /= */ - private[math] def /@=:(that: Double) = v.assign(Functions.INV).assign(Functions.MULT, that) - - /** Elementwise right-associative /= */ - private[math] def /@=:(that: Vector) = v.assign(Functions.INV).assign(that, Functions.MULT) - - /** Elementwise * */ - private[math] def *@(that: Vector) = cloned *= that - - /** Elementwise * */ - private[math] def *@(that: Double) = cloned *= that - - /** Elementwise / */ - private[math] def /@(that: Vector) = cloned /= that - - /** Elementwise / */ - private[math] def /@(that: Double) = cloned /= that - - /** Elementwise right-associative / */ - private[math] def /@:(that: Double) = that /=: v.cloned - - /** Elementwise right-associative / */ - private[math] def /@:(that: Vector) = that.cloned /= v - - -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala deleted file mode 100644 index f3be285..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math.flavor.TraversingStructureEnum -import org.apache.mahout.math.function.{DoubleFunction, Functions, VectorFunction} -import org.apache.mahout.math.{Matrices, Matrix, QRDecomposition, Vector} - -import scala.collection.JavaConversions._ -import scala.collection._ -import scala.math._ - -class MatrixOps(val m: Matrix) { - - import MatrixOps._ - - // We need this for some functions below (but it would screw some functions above) - import RLikeOps.v2vOps - - def nrow = m.rowSize() - - def ncol = m.columnSize() - - /** - * Warning: this creates a clone (as in mx * -1), in many applications inplace inversion `mx *= -1` - * might be an infinitely better choice. - */ - def unary_- = cloned.assign(Functions.NEGATE) - - def +=(that: Matrix) = m.assign(that, Functions.PLUS) - - def +=:(that:Matrix) = m += that - - def +=:(that:Double) = m += that - - def -=(that: Matrix) = m.assign(that, Functions.MINUS) - - def +=(that: Double) = m.assign(new DoubleFunction { - def apply(x: Double): Double = x + that - }) - - def -=(that: Double) = +=(-that) - - def -=:(that: Double) = m.assign(Functions.minus(that)) - - /** A := B - A which is -(A - B) */ - def -=:(that: Matrix) = m.assign(that, Functions.chain(Functions.NEGATE, Functions.MINUS)) - - def +(that: Matrix) = cloned += that - - def -(that: Matrix) = cloned -= that - - def -:(that: Matrix) = that - m - - // m.plus(that)? - - def +(that: Double) = cloned += that - - def +:(that:Double) = cloned += that - - def -(that: Double) = cloned -= that - - def -:(that: Double) = that -=: cloned - - def norm = math.sqrt(m.aggregate(Functions.PLUS, Functions.SQUARE)) - - def pnorm(p: Int) = pow(m.aggregate(Functions.PLUS, Functions.chain(Functions.ABS, Functions.pow(p))), 1.0 / p) - - def apply(row: Int, col: Int) = m.get(row, col) - - def update(row: Int, col: Int, that: Double): Matrix = { - m.setQuick(row, col, that) - m - } - - def update(rowRange: Range, colRange: Range, that: Double) = apply(rowRange, colRange) := that - - def update(row: Int, colRange: Range, that: Double) = apply(row, colRange) := that - - def update(rowRange: Range, col: Int, that: Double) = apply(rowRange, col) := that - - def update(rowRange: Range, colRange: Range, that: Matrix) = apply(rowRange, colRange) := that - - def update(row: Int, colRange: Range, that: Vector) = apply(row, colRange) := that - - def update(rowRange: Range, col: Int, that: Vector) = apply(rowRange, col) := that - - - def apply(rowRange: Range, colRange: Range): Matrix = { - - if (rowRange == :: && - colRange == ::) return m - - val rr = if (rowRange == ::) 0 until m.nrow - else rowRange - val cr = if (colRange == ::) 0 until m.ncol - else colRange - - m.viewPart(rr.start, rr.length, cr.start, cr.length) - - } - - def apply(row: Int, colRange: Range): Vector = { - var r = m.viewRow(row) - if (colRange != ::) r = r.viewPart(colRange.start, colRange.length) - r - } - - def apply(rowRange: Range, col: Int): Vector = { - var c = m.viewColumn(col) - if (rowRange != ::) c = c.viewPart(rowRange.start, rowRange.length) - c - } - - /** - * Apply a function element-wise without side-effects to the argument (creates a new matrix). - * - * @param f element-wise function "value" ⇒ "new value" - * @param evalZeros Do we have to process zero elements? true, false, auto: if auto, we will test - * the supplied function for `f(0) != 0`, and depending on the result, will - * decide if we want evaluation for zero elements. WARNING: the AUTO setting - * may not always work correctly for functions that are meant to run in a specific - * backend context, or non-deterministic functions, such as {-1,0,1} random - * generators. - * @return new DRM with the element-wise function applied. - */ - def apply(f: Double ⇒ Double, evalZeros: AutoBooleanEnum.T): Matrix = { - val ezeros = evalZeros match { - case AutoBooleanEnum.TRUE ⇒ true - case AutoBooleanEnum.FALSE ⇒ false - case AutoBooleanEnum.AUTO ⇒ f(0) != 0 - } - if (ezeros) m.cloned := f else m.cloned ::= f - } - - /** - * Apply a function element-wise without side-effects to the argument (creates a new matrix). - * - * @param f element-wise function (row, column, value) ⇒ "new value" - * @param evalZeros Do we have to process zero elements? true, false, auto: if auto, we will test - * the supplied function for `f(0) != 0`, and depending on the result, will - * decide if we want evaluation for zero elements. WARNING: the AUTO setting - * may not always work correctly for functions that are meant to run in a specific - * backend context, or non-deterministic functions, such as {-1,0,1} random - * generators. - * @return new DRM with the element-wise function applied. - */ - def apply(f: (Int, Int, Double) ⇒ Double, evalZeros: AutoBooleanEnum.T): Matrix = { - val ezeros = evalZeros match { - case AutoBooleanEnum.TRUE ⇒ true - case AutoBooleanEnum.FALSE ⇒ false - case AutoBooleanEnum.AUTO ⇒ f(0,0,0) != 0 - } - if (ezeros) m.cloned := f else m.cloned ::= f - } - - /** A version of function apply with default AUTO treatment of `evalZeros`. */ - def apply(f: Double ⇒ Double): Matrix = apply(f, AutoBooleanEnum.AUTO) - - /** A version of function apply with default AUTO treatment of `evalZeros`. */ - def apply(f: (Int, Int, Double) ⇒ Double): Matrix = apply(f, AutoBooleanEnum.AUTO) - - - /** - * Warning: This provides read-only view only. - * In most cases that's what one wants. To get a copy, - * use m.t cloned - * - * @return transposed view - */ - def t = Matrices.transposedView(m) - - def det = m.determinant() - - def sum = m.zSum() - - def :=(that: Matrix) = m.assign(that) - - /** - * Assigning from a row-wise collection of vectors - * - * @param that - - */ - def :=(that: TraversableOnce[Vector]) = { - var row = 0 - that.foreach(v => { - m.assignRow(row, v) - row += 1 - }) - } - - def :=(that: Double) = m.assign(that) - - def :=(f: (Int, Int, Double) => Double): Matrix = { - import RLikeOps._ - m.getFlavor.getStructure match { - case TraversingStructureEnum.COLWISE | TraversingStructureEnum.SPARSECOLWISE => - for (col <- t; el <- col.all) el := f(el.index, col.index, el) - case default => - for (row <- m; el <- row.all) el := f(row.index, el.index, el) - } - m - } - - /** Functional assign with (Double) => Double */ - def :=(f: (Double) => Double): Matrix = { - import RLikeOps._ - m.getFlavor.getStructure match { - case TraversingStructureEnum.COLWISE | TraversingStructureEnum.SPARSECOLWISE => - for (col <- t; el <- col.all) el := f(el) - case default => - for (row <- m; el <- row.all) el := f(el) - } - m - } - - /** Sparse assign: iterate and assign over non-zeros only */ - def ::=(f: (Int, Int, Double) => Double): Matrix = { - - import RLikeOps._ - - m.getFlavor.getStructure match { - case TraversingStructureEnum.COLWISE | TraversingStructureEnum.SPARSECOLWISE => - for (col <- t; el <- col.nonZeroes) el := f(el.index, col.index, el) - case default => - for (row <- m; el <- row.nonZeroes) el := f(row.index, el.index, el) - } - m - } - - /** Sparse function assign: iterate and assign over non-zeros only */ - def ::=(f: (Double) => Double): Matrix = { - - import RLikeOps._ - - m.getFlavor.getStructure match { - case TraversingStructureEnum.COLWISE | TraversingStructureEnum.SPARSECOLWISE => - for (col <- t; el <- col.nonZeroes) el := f(el) - case default => - for (row <- m; el <- row.nonZeroes) el := f(el) - } - m - } - - def cloned: Matrix = m.like := m - - /** - * Ideally, we would probably want to override equals(). But that is not - * possible without modifying AbstractMatrix implementation in Mahout - * which would require discussion at Mahout team. - * - * @param that - * @return - */ - def equiv(that: Matrix) = - - // Warning: TODO: This would actually create empty objects in SparseMatrix. Should really implement - // merge-type comparison strategy using iterateNonEmpty. - that != null && - nrow == that.nrow && - m.view.zip(that).forall(t => { - t._1.equiv(t._2) - }) - - def nequiv(that: Matrix) = !equiv(that) - - def ===(that: Matrix) = equiv(that) - - def !==(that: Matrix) = nequiv(that) - - /** - * test if rank == min(nrow,ncol). - * - * @return - */ - def isFullRank: Boolean = - new QRDecomposition(if (nrow < ncol) m t else m cloned).hasFullRank - - def colSums() = m.aggregateColumns(vectorSumFunc) - - def rowSums() = m.aggregateRows(vectorSumFunc) - - def colMeans() = if (m.nrow == 0) colSums() else colSums() /= m.nrow - - def rowMeans() = if (m.ncol == 0) rowSums() else rowSums() /= m.ncol - - /* Diagonal */ - def diagv: Vector = m.viewDiagonal() - - /* Diagonal assignment */ - def diagv_=(that: Vector) = diagv := that - - /* Diagonal assignment */ - def diagv_=(that: Double) = diagv := that - - /* Row and Column non-zero element counts */ - def numNonZeroElementsPerColumn() = m.aggregateColumns(vectorCountNonZeroElementsFunc) - - def numNonZeroElementsPerRow() = m.aggregateRows(vectorCountNonZeroElementsFunc) -} - -object MatrixOps { - - import RLikeOps.v2vOps - - implicit def m2ops(m: Matrix): MatrixOps = new MatrixOps(m) - - private def vectorSumFunc = new VectorFunction { - def apply(f: Vector): Double = f.sum - } - - private def vectorCountNonZeroElementsFunc = new VectorFunction { - //def apply(f: Vector): Double = f.aggregate(Functions.PLUS, Functions.notEqual(0)) - def apply(f: Vector): Double = f.getNumNonZeroElements().toDouble - } - -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeDoubleScalarOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeDoubleScalarOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeDoubleScalarOps.scala deleted file mode 100644 index a1e9377..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeDoubleScalarOps.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math._ - -class RLikeDoubleScalarOps(val x:Double) extends AnyVal{ - - import RLikeOps._ - - def +(that:Matrix) = that + x - - def +(that:Vector) = that + x - - def *(that:Matrix) = that * x - - def *(that:Vector) = that * x - - def -(that:Matrix) = x -: that - - def -(that:Vector) = x -: that - - def /(that:Matrix) = x /: that - - def /(that:Vector) = x /: that - - def cbind(that:Matrix) = { - val mx = that.like(that.nrow, that.ncol + 1) - mx(::, 1 until mx.ncol) := that - if (x != 0.0) mx(::, 0) := x - mx - } - - def rbind(that: Matrix) = { - val mx = that.like(that.nrow + 1, that.ncol) - mx(1 until mx.nrow, ::) := that - if (x != 0.0) mx(0, ::) := x - mx - } - - def c(that: Vector): Vector = { - val cv = that.like(that.length + 1) - cv(1 until cv.length) := that - cv(0) = x - cv - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala deleted file mode 100644 index 3ba6ce0..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math.function.Functions -import org.apache.mahout.math.{Matrix, Vector} - -import scala.collection.JavaConversions._ -import RLikeOps._ -import org.apache.mahout.math.backend.RootSolverFactory -import org.apache.mahout.math.scalabindings._ - - -class RLikeMatrixOps(m: Matrix) extends MatrixOps(m) { - - /** Structure-optimized mmul */ - - implicit var solverOperator: opMMulSolver = _ - - // get the solver matching the implicit variable solverOperator - def mmulSolver = RootSolverFactory.getOperator - - def %*%(that: Matrix) = mmulSolver(m, that, None) - - def :%*%(that:Matrix) = %*%(that) - - def %*%:(that: Matrix) = that :%*% m - - /** - * The "legacy" matrix-matrix multiplication. - * - * @param that right hand operand - * @return matrix multiplication result - * @deprecated use %*% - */ - def %***%(that: Matrix) = m.times(that) - - /** - * matrix-vector multiplication - * @param that - * @return - */ - def %*%(that: Vector) = m.times(that) - - /** - * Hadamard product - * - * @param that - * @return - */ - - def *(that: Matrix) = cloned *= that - - def *(that: Double) = cloned *= that - - def *:(that:Double) = cloned *= that - - def /(that: Matrix) = cloned /= that - - def /:(that: Matrix) = that / m - - def /(that: Double) = cloned /= that - - /** 1.0 /: A is eqivalent to R's 1.0/A */ - def /:(that: Double) = that /=: cloned - - /** - * in-place Hadamard product. We probably don't want to use assign - * to optimize for sparse operations, in case of Hadamard product - * it really can be done - * @param that - */ - def *=(that: Matrix) = { - m.assign(that, Functions.MULT) - m - } - - /** A *=: B is equivalent to B *= A. Included for completeness. */ - def *=:(that: Matrix) = m *= that - - /** Elementwise deletion */ - def /=(that: Matrix) = { - m.zip(that).foreach(t ⇒ t._1.vector() /= t._2.vector) - m - } - - def *=(that: Double) = { - m.foreach(_.vector() *= that) - m - } - - /** 5.0 *=: A is equivalent to A *= 5.0. Included for completeness. */ - def *=:(that: Double) = m *= that - - def /=(that: Double) = { - m ::= { x ⇒ x / that } - m - } - - /** 1.0 /=: A is equivalent to A = 1.0/A in R */ - def /=:(that: Double) = { - if (that != 0.0) m := { x ⇒ that / x } - m - } - - def ^=(that: Double) = { - that match { - // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x ^ 2 and x * x since - // pow(x,2) function return results different from x * x; but much of the code uses this - // interchangeably. Not having this done will create things like NaN entries on main diagonal - // of a distance matrix. - case 2.0 ⇒ m ::= { x ⇒ x * x } - case 0.5 ⇒ m ::= math.sqrt _ - case _ ⇒ m ::= { x ⇒ math.pow(x, that) } - } - } - - def ^(that: Double) = m.cloned ^= that - - def cbind(that: Matrix): Matrix = { - require(m.nrow == that.nrow) - if (m.ncol > 0) { - if (that.ncol > 0) { - val mx = m.like(m.nrow, m.ncol + that.ncol) - mx(::, 0 until m.ncol) := m - mx(::, m.ncol until mx.ncol) := that - mx - } else m - } else that - } - - def cbind(that: Double): Matrix = { - val mx = m.like(m.nrow, m.ncol + 1) - mx(::, 0 until m.ncol) := m - if (that != 0.0) mx(::, m.ncol) := that - mx - } - - def rbind(that: Matrix): Matrix = { - require(m.ncol == that.ncol) - if (m.nrow > 0) { - if (that.nrow > 0) { - val mx = m.like(m.nrow + that.nrow, m.ncol) - mx(0 until m.nrow, ::) := m - mx(m.nrow until mx.nrow, ::) := that - mx - } else m - } else that - } - - def rbind(that: Double): Matrix = { - val mx = m.like(m.nrow + 1, m.ncol) - mx(0 until m.nrow, ::) := m - if (that != 0.0) mx(m.nrow, ::) := that - mx - } -} - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala deleted file mode 100644 index a6f9f5b..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math.{Vector, Matrix} - -/** - * R-like operators. Declare import RLikeOps._ to enable. - */ -object RLikeOps { - - implicit def double2Scalar(x:Double) = new RLikeDoubleScalarOps(x) - - implicit def v2vOps(v: Vector) = new RLikeVectorOps(v) - - implicit def el2elOps(el: Vector.Element) = new ElementOps(el) - - implicit def el2Double(el:Vector.Element) = el.get() - - implicit def m2mOps(m: Matrix) = new RLikeMatrixOps(m) - - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala deleted file mode 100644 index 394795f..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math.Vector -import org.apache.mahout.math.function.Functions -import RLikeOps._ - -/** - * R-like operators - * - * @param _v - */ -class RLikeVectorOps(_v: Vector) extends VectorOps(_v) { - - /** Elementwise *= */ - def *=(that: Vector) = v.assign(that, Functions.MULT) - - def *=:(that:Vector) = *=(that) - - /** Elementwise /= */ - def /=(that: Vector) = v.assign(that, Functions.DIV) - - /** Elementwise *= */ - def *=(that: Double) = v.assign(Functions.MULT, that) - - def *=:(that: Double) = *=(that) - - /** Elementwise /= */ - def /=(that: Double) = v.assign(Functions.DIV, that) - - /** Elementwise right-associative /= */ - def /=:(that: Double) = v.assign(Functions.INV).assign(Functions.MULT, that) - - /** Elementwise right-associative /= */ - def /=:(that: Vector) = v.assign(Functions.INV).assign(that, Functions.MULT) - - /** Elementwise * */ - def *(that: Vector) = cloned *= that - - /** Elementwise * */ - def *(that: Double) = cloned *= that - - /** Elementwise * */ - def *:(that: Double) = cloned *= that - - /** Elementwise / */ - def /(that: Vector) = cloned /= that - - /** Elementwise / */ - def /(that: Double) = cloned /= that - - /** Elementwise right-associative / */ - def /:(that: Double) = that /=: v.cloned - - /** Elementwise right-associative / */ - def /:(that: Vector) = that.cloned /= v - - def ^=(that: Double) = that match { - // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x ^ 2 and x * x since - // pow(x,2) function return results different from x * x; but much of the code uses this - // interchangeably. Not having this done will create things like NaN entries on main diagonal - // of a distance matrix. - case 2.0 ⇒ v.assign(Functions.SQUARE) - case 0.5 ⇒ v.assign(Functions.SQRT) - case _ ⇒ v.assign (Functions.POW, that) - } - - def ^=(that: Vector) = v.assign(that, Functions.POW) - - def ^(that: Double) = v.cloned ^= that - - def ^(that: Vector) = v.cloned ^= that - - def c(that: Vector) = { - if (v.length > 0) { - if (that.length > 0) { - val cv = v.like(v.length + that.length) - cv(0 until v.length) := cv - cv(v.length until cv.length) := that - cv - } else v - } else that - } - - def c(that: Double) = { - val cv = v.like(v.length + 1) - cv(0 until v.length) := v - cv(v.length) = that - cv - } - - def mean = sum / length - -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala deleted file mode 100644 index 30311b8..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math._ -import scala.collection.JavaConversions._ -import org.apache.mahout.math.function.Functions - -/** - * Syntactic sugar for mahout vectors - * @param v Mahout vector - */ -class VectorOps(private[scalabindings] val v: Vector) { - - import RLikeOps._ - - def apply(i: Int) = v.get(i) - - def update(i: Int, that: Double) = v.setQuick(i, that) - - /** Warning: we only support consecutive views, step is not supported directly */ - def apply(r: Range) = if (r == ::) v else v.viewPart(r.start, r.length * r.step) - - def update(r: Range, that: Vector) = apply(r) := that - - /** R-like synonyms for java methods on vectors */ - def sum = v.zSum() - - def min = v.minValue() - - def max = v.maxValue() - - def :=(that: Vector): Vector = { - - // assign op in Mahout requires same - // cardinality between vectors . - // we want to relax it here and require - // v to have _at least_ as large cardinality - // as "that". - if (that.length == v.size()) - v.assign(that) - else if (that.length < v.size) { - v.assign(0.0) - that.nonZeroes().foreach(t => v.setQuick(t.index, t.get)) - v - } else throw new IllegalArgumentException("Assigner's cardinality less than assignee's") - } - - def :=(that: Double): Vector = v.assign(that) - - /** Functional assigment for a function with index and x */ - def :=(f: (Int, Double) => Double): Vector = { - for (i <- 0 until length) v(i) = f(i, v(i)) - v - } - - /** Functional assignment for a function with just x (e.g. v := math.exp _) */ - def :=(f:(Double)=>Double):Vector = { - for (i <- 0 until length) v(i) = f(v(i)) - v - } - - /** Sparse iteration functional assignment using function receiving index and x */ - def ::=(f: (Int, Double) => Double): Vector = { - for (el <- v.nonZeroes) el := f(el.index, el.get) - v - } - - /** Sparse iteration functional assignment using a function recieving just x */ - def ::=(f: (Double) => Double): Vector = { - for (el <- v.nonZeroes) el := f(el.get) - v - } - - def equiv(that: Vector) = - length == that.length && - v.all.view.zip(that.all).forall(t => t._1.get == t._2.get) - - def ===(that: Vector) = equiv(that) - - def !==(that: Vector) = nequiv(that) - - def nequiv(that: Vector) = !equiv(that) - - def unary_- = cloned.assign(Functions.NEGATE) - - def +=(that: Vector) = v.assign(that, Functions.PLUS) - - def +=:(that: Vector) = +=(that) - - def -=(that: Vector) = v.assign(that, Functions.MINUS) - - def +=(that: Double) = v.assign(Functions.PLUS, that) - - def +=:(that: Double) = +=(that) - - def -=(that: Double) = +=(-that) - - def -=:(that: Vector) = v.assign(Functions.NEGATE).assign(that, Functions.PLUS) - - def -=:(that: Double) = v.assign(Functions.NEGATE).assign(Functions.PLUS, that) - - def +(that: Vector) = cloned += that - - def -(that: Vector) = cloned -= that - - def -:(that: Vector) = that.cloned -= v - - def +(that: Double) = cloned += that - - def +:(that: Double) = cloned += that - - def -(that: Double) = cloned -= that - - def -:(that: Double) = that -=: v.cloned - - def length = v.size() - - def cloned: Vector = v.like := v - - def sqrt = v.cloned.assign(Functions.SQRT) - - /** Convert to a single column matrix */ - def toColMatrix: Matrix = { - import RLikeOps._ - v match { - - case vd: Vector if vd.isDense => dense(vd).t - case srsv: RandomAccessSparseVector => new SparseColumnMatrix(srsv.length, 1, Array(srsv)) - case _ => sparse(v).t - } - } - -} - -class ElementOps(private[scalabindings] val el: Vector.Element) { - import RLikeOps._ - - def update(v: Double): Double = { el.set(v); v } - - def :=(that: Double) = update(that) - - def *(that: Vector.Element): Double = this * that - - def *(that: Vector): Vector = el.get * that - - def +(that: Vector.Element): Double = this + that - - def +(that: Vector) :Vector = el.get + that - - def /(that: Vector.Element): Double = this / that - - def /(that:Vector):Vector = el.get / that - - def -(that: Vector.Element): Double = this - that - - def -(that: Vector) :Vector = el.get - that - -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala deleted file mode 100644 index 4115091..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math - -import org.apache.mahout.math.solver.EigenDecomposition - -import collection._ -import scala.util.Random - -/** - * Mahout matrices and vectors' scala syntactic sugar - */ -package object scalabindings { - - - // Reserved "ALL" range - final val `::`: Range = null - - // values for stochastic sparsityAnalysis - final val z95 = 1.959964 - final val z80 = 1.281552 - final val maxSamples = 500 - final val minSamples = 15 - - // Some enums - object AutoBooleanEnum extends Enumeration { - type T = Value - val TRUE, FALSE, AUTO = Value - } - - implicit def seq2Vector(s: TraversableOnce[AnyVal]) = - new DenseVector(s.map(_.asInstanceOf[Number].doubleValue()).toArray) - - implicit def tuple2TravOnce2svec[V <: AnyVal](sdata: TraversableOnce[(Int, V)]) = svec(sdata) - - implicit def t1vec(s: Tuple1[AnyVal]): Vector = prod2Vec(s) - - implicit def t2vec(s: Tuple2[AnyVal, AnyVal]): Vector = prod2Vec(s) - - implicit def t3vec(s: Tuple3[AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s) - - implicit def t4vec(s: Tuple4[AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s) - - implicit def t5vec(s: Tuple5[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s) - - implicit def t6vec(s: Tuple6[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s) - - implicit def t7vec(s: Tuple7[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s) - - implicit def t8vec(s: Tuple8[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s) - - implicit def t9vec(s: Tuple9[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector = - prod2Vec(s) - - implicit def t10vec(s: Tuple10[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t11vec(s: Tuple11[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal]) - : Vector = prod2Vec(s) - - implicit def t12vec(s: Tuple12[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t13vec(s: Tuple13[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t14vec(s: Tuple14[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t15vec(s: Tuple15[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t16vec(s: Tuple16[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t17vec(s: Tuple17[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t18vec(s: Tuple18[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t19vec(s: Tuple19[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t20vec(s: Tuple20[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t21vec(s: Tuple21[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - implicit def t22vec(s: Tuple22[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal - , AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]) - : Vector = prod2Vec(s) - - - def prod2Vec(s: Product) = new DenseVector(s.productIterator. - map(_.asInstanceOf[Number].doubleValue()).toArray) - - def diagv(v: Vector): DiagonalMatrix = new DiagonalMatrix(v) - - def diag(v: Double, size: Int): DiagonalMatrix = - new DiagonalMatrix(new DenseVector(Array.fill(size)(v))) - - def eye(size: Int) = new DiagonalMatrix(1.0, size) - - /** - * Create dense matrix out of inline arguments -- rows -- which can be tuples, - * iterables of Double, or just single Number (for columnar vectors) - * @param rows - * @tparam R - * @return - */ - def dense[R](rows: R*): DenseMatrix = { - import RLikeOps._ - val data = for (r ← rows) yield { - r match { - case n: Number ⇒ Array(n.doubleValue()) - case t: Vector ⇒ Array.tabulate(t.length)(t(_)) - case t: Array[Double] ⇒ t - case t: Iterable[_] ⇒ - t.head match { - case ss: Double ⇒ t.asInstanceOf[Iterable[Double]].toArray - case vv: Vector ⇒ - val m = new DenseMatrix(t.size, t.head.asInstanceOf[Vector].length) - t.asInstanceOf[Iterable[Vector]].view.zipWithIndex.foreach { - case (v, idx) ⇒ m(idx, ::) := v - } - return m - } - case t: Product ⇒ t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray - case t: Array[Array[Double]] ⇒ if (rows.size == 1) - return new DenseMatrix(t) - else - throw new IllegalArgumentException( - "double[][] data parameter can be the only argument for dense()") - case t: Array[Vector] ⇒ - val m = new DenseMatrix(t.size, t.head.length) - t.view.zipWithIndex.foreach { - case (v, idx) ⇒ m(idx, ::) := v - } - return m - case _ ⇒ throw new IllegalArgumentException("unsupported type in the inline Matrix initializer") - } - } - new DenseMatrix(data.toArray) - } - - /** - * Default initializes are always row-wise. - * create a sparse, - * e.g. {{{ - * - * m = sparse( - * (0,5)::(9,3)::Nil, - * (2,3.5)::(7,8)::Nil - * ) - * - * }}} - * - * @param rows - * @return - */ - - def sparse(rows: Vector*): SparseRowMatrix = { - import RLikeOps._ - val nrow = rows.size - val ncol = rows.map(_.size()).max - val m = new SparseRowMatrix(nrow, ncol) - m := rows.map { row ⇒ - if (row.length < ncol) { - val newRow = row.like(ncol) - newRow(0 until row.length) := row - newRow - } - else row - } - m - - } - - /** - * create a sparse vector out of list of tuple2's - * @param sdata cardinality - * @return - */ - def svec(sdata: TraversableOnce[(Int, AnyVal)], cardinality: Int = -1) = { - val required = if (sdata.nonEmpty) sdata.map(_._1).max + 1 else 0 - var tmp = -1 - if (cardinality < 0) { - tmp = required - } else if (cardinality < required) { - throw new IllegalArgumentException(s"Required cardinality %required but got %cardinality") - } else { - tmp = cardinality - } - val initialCapacity = sdata.size - val sv = new RandomAccessSparseVector(tmp, initialCapacity) - sdata.foreach(t ⇒ sv.setQuick(t._1, t._2.asInstanceOf[Number].doubleValue())) - sv - } - - def dvec(fromV: Vector) = new DenseVector(fromV) - - def dvec(ddata: TraversableOnce[Double]) = new DenseVector(ddata.toArray) - - def dvec(numbers: Number*) = new DenseVector(numbers.map(_.doubleValue()).toArray) - - def chol(m: Matrix, pivoting: Boolean = false) = new CholeskyDecomposition(m, pivoting) - - /** - * computes SVD - * @param m svd input - * @return (U,V, singular-values-vector) - */ - def svd(m: Matrix) = { - val svdObj = new SingularValueDecomposition(m) - (svdObj.getU, svdObj.getV, new DenseVector(svdObj.getSingularValues)) - } - - /** - * Computes Eigendecomposition of a symmetric matrix - * @param m symmetric input matrix - * @return (V, eigen-values-vector) - */ - def eigen(m: Matrix) = { - val ed = new EigenDecomposition(m, true) - (ed.getV, ed.getRealEigenvalues) - } - - - /** - * More general version of eigen decomposition - * @param m - * @param symmetric - * @return (V, eigenvalues-real-vector, eigenvalues-imaginary-vector) - */ - def eigenFull(m: Matrix, symmetric: Boolean = true) { - val ed = new EigenDecomposition(m, symmetric) - (ed.getV, ed.getRealEigenvalues, ed.getImagEigenvalues) - } - - /** - * QR. - * - * Right now Mahout's QR seems to be using argument for in-place transformations, - * so the matrix context gets messed after this. Hence we force cloning of the - * argument before passing it to Mahout's QR so to keep expected semantics. - * @param m - * @return (Q,R) - */ - def qr(m: Matrix) = { - import MatrixOps._ - val qrdec = new QRDecomposition(m cloned) - (qrdec.getQ, qrdec.getR) - } - - /** - * Solution X of A*X = B using QR-Decomposition, where A is a square, non-singular matrix. - * - * @param a - * @param b - * @return (X) - */ - def solve(a: Matrix, b: Matrix): Matrix = { - import MatrixOps._ - if (a.nrow != a.ncol) { - throw new IllegalArgumentException("supplied matrix A is not square") - } - val qr = new QRDecomposition(a cloned) - if (!qr.hasFullRank) { - throw new IllegalArgumentException("supplied matrix A is singular") - } - qr.solve(b) - } - - /** - * Solution A^{-1} of A*A^{-1} = I using QR-Decomposition, where A is a square, - * non-singular matrix. Here only for compatibility with R semantics. - * - * @param a - * @return (A^{-1}) - */ - def solve(a: Matrix): Matrix = { - import MatrixOps._ - solve(a, eye(a.nrow)) - } - - /** - * Solution x of A*x = b using QR-Decomposition, where A is a square, non-singular matrix. - * - * @param a - * @param b - * @return (x) - */ - def solve(a: Matrix, b: Vector): Vector = { - import RLikeOps._ - val x = solve(a, b.toColMatrix) - x(::, 0) - } - - /////////////////////////////////////////////////////////// - // Elementwise unary functions. Actually this requires creating clones to avoid side effects. For - // efficiency reasons one may want to actually do in-place exression assignments instead, e.g. - // - // m := exp _ - - import RLikeOps._ - import scala.math._ - - def mexp(m: Matrix): Matrix = m.cloned := exp _ - - def vexp(v: Vector): Vector = v.cloned := exp _ - - def mlog(m: Matrix): Matrix = m.cloned := log _ - - def vlog(v: Vector): Vector = v.cloned := log _ - - def mabs(m: Matrix): Matrix = m.cloned ::= (abs(_: Double)) - - def vabs(v: Vector): Vector = v.cloned ::= (abs(_: Double)) - - def msqrt(m: Matrix): Matrix = m.cloned ::= sqrt _ - - def vsqrt(v: Vector): Vector = v.cloned ::= sqrt _ - - def msignum(m: Matrix): Matrix = m.cloned ::= (signum(_: Double)) - - def vsignum(v: Vector): Vector = v.cloned ::= (signum(_: Double)) - - ////////////////////////////////////////////////////////// - // operation funcs - - - /** Matrix-matrix unary func */ - type MMUnaryFunc = (Matrix, Option[Matrix]) ⇒ Matrix - /** Binary matrix-matrix operations which may save result in-place, optionally */ - type MMBinaryFunc = (Matrix, Matrix, Option[Matrix]) ⇒ Matrix - type MVBinaryFunc = (Matrix, Vector, Option[Matrix]) ⇒ Matrix - type VMBinaryFunc = (Vector, Matrix, Option[Matrix]) ⇒ Matrix - type MDBinaryFunc = (Matrix, Double, Option[Matrix]) ⇒ Matrix - - trait opMMulSolver extends MMBinaryFunc { - - } - - ///////////////////////////////////// - // Miscellaneous in-core utilities - - /** - * Compute column-wise means and variances. - * - * @return colMeans → colVariances - */ - def colMeanVars(mxA:Matrix): (Vector, Vector) = { - val mu = mxA.colMeans() - val variance = (mxA * mxA colMeans) -= mu ^ 2 - mu → variance - } - - /** - * Compute column-wise means and stdevs. - * @param mxA input - * @return colMeans → colStdevs - */ - def colMeanStdevs(mxA:Matrix) = { - val (mu, variance) = colMeanVars(mxA) - mu → (variance ::= math.sqrt _) - } - - /** Compute square distance matrix. We assume data points are row-wise, similar to R's dist(). */ - def sqDist(mxX: Matrix): Matrix = { - - val s = mxX ^ 2 rowSums - - (mxX %*% mxX.t) := { (r, c, x) ⇒ s(r) + s(c) - 2 * x} - } - - /** - * Pairwise squared distance computation. - * @param mxX X, m x d - * @param mxY Y, n x d - * @return pairwise squaired distances of row-wise data points in X and Y (m x n) - */ - def sqDist(mxX: Matrix, mxY: Matrix): Matrix = { - - val s = mxX ^ 2 rowSums - - val t = mxY ^ 2 rowSums - - // D = s*1' + 1*t' - 2XY' - (mxX %*% mxY.t) := { (r, c, d) ⇒ s(r) + t(c) - 2.0 * d} - } - - def dist(mxX: Matrix): Matrix = sqDist(mxX) := sqrt _ - - def dist(mxX: Matrix, mxY: Matrix): Matrix = sqDist(mxX, mxY) := sqrt _ - - /** - * Check the density of an in-core matrix based on supplied criteria. - * Returns true if we think mx is denser than threshold with at least 80% confidence. - * - * @param mx The matrix to check density of. - * @param threshold the threshold of non-zero elements above which we consider a Matrix Dense - */ - def densityAnalysis(mx: Matrix, threshold: Double = 0.25): Boolean = { - - require(threshold >= 0.0 && threshold <= 1.0) - var n = minSamples - var mean = 0.0 - val rnd = new Random() - val dimm = mx.nrow - val dimn = mx.ncol - val pq = threshold * (1 - threshold) - - for (s ← 0 until minSamples) { - if (mx(rnd.nextInt(dimm), rnd.nextInt(dimn)) != 0.0) mean += 1 - } - mean /= minSamples - val iv = z80 * math.sqrt(pq / n) - - if (mean < threshold - iv) return false // sparse - else if (mean > threshold + iv) return true // dense - - while (n < maxSamples) { - // Determine upper bound we may need for n to likely relinquish the uncertainty. Here, we use - // confidence interval formula but solved for n. - val ivNeeded = math.abs(threshold - mean) max 1e-11 - - val stderr = ivNeeded / z80 - val nNeeded = (math.ceil(pq / (stderr * stderr)).toInt max n min maxSamples) - n - - var meanNext = 0.0 - for (s ← 0 until nNeeded) { - if (mx(rnd.nextInt(dimm), rnd.nextInt(dimn)) != 0.0) meanNext += 1 - } - mean = (n * mean + meanNext) / (n + nNeeded) - n += nNeeded - - // Are we good now? - val iv = z80 * math.sqrt(pq / n) - if (mean < threshold - iv) return false // sparse - else if (mean > threshold + iv) return true // dense - } - - mean > threshold // if (mean > threshold) dense - - } - - - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/nlp/tfidf/TFIDF.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/nlp/tfidf/TFIDF.scala b/math-scala/src/main/scala/org/apache/mahout/nlp/tfidf/TFIDF.scala deleted file mode 100644 index c75ff20..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/nlp/tfidf/TFIDF.scala +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.nlp.tfidf - -trait TermWeight { - - /** - * @param tf term freq - * @param df doc freq - * @param length Length of the document - * @param numDocs the total number of docs - */ - def calculate(tf: Int, df: Int, length: Int, numDocs: Int): Double -} - - -class TFIDF extends TermWeight { - - /** - * Calculate TF-IDF weight. - * - * Lucene 4.6's DefaultSimilarity TF-IDF calculation uses the formula: - * - * sqrt(termFreq) * (log(numDocs / (docFreq + 1)) + 1.0) - * - * Note: this is consistent with the MapReduce seq2sparse implementation of TF-IDF weights - * and is slightly different from Spark MLlib's TD-IDF calculation which is implemented as: - * - * termFreq * log((numDocs + 1.0) / (docFreq + 1.0)) - * - * @param tf term freq - * @param df doc freq - * @param length Length of the document - UNUSED - * @param numDocs the total number of docs - * @return The TF-IDF weight as calculated by Lucene 4.6's DefaultSimilarity - */ - def calculate(tf: Int, df: Int, length: Int, numDocs: Int): Double = { - - // Lucene 4.6 DefaultSimilarity's TF-IDF is implemented as: - // sqrt(tf) * (log(numDocs / (df + 1)) + 1) - math.sqrt(tf) * (math.log(numDocs / (df + 1).toDouble) + 1.0) - } -} - -class MLlibTFIDF extends TermWeight { - - /** - * Calculate TF-IDF weight with IDF formula used by Spark MLlib's IDF: - * - * termFreq * log((numDocs + 1.0) / (docFreq + 1.0)) - * - * Use this weight if working with MLLib vectorized documents. - * - * Note: this is not consistent with the MapReduce seq2sparse implementation of TF-IDF weights - * which is implemented using Lucene DefaultSimilarity's TF-IDF calculation: - * - * sqrt(termFreq) * (log(numDocs / (docFreq + 1)) + 1.0) - * - * @param tf term freq - * @param df doc freq - * @param length Length of the document - UNUSED - * @param numDocs the total number of docs - * @return The TF-IDF weight as calculated by Spark MLlib's IDF - */ - def calculate(tf: Int, df: Int, length: Int, numDocs: Int): Double = { - - // Spark MLLib's TF-IDF weight is implemented as: - // termFreq * log((numDocs + 1.0) / (docFreq + 1.0)) - tf * math.log((numDocs + 1.0) / (df + 1).toDouble) - } -} - -class TF extends TermWeight { - - /** - * For TF Weight simply return the absolute TF. - * - * Note: We do not use Lucene 4.6's DefaultSimilarity's TF calculation here - * which returns: - * - * sqrt(tf) - * - * this is consistent with the MapReduce seq2sparse implementation of TF weights. - * - * @param tf term freq - * @param df doc freq - UNUSED - * @param length Length of the document - UNUSED - * @param numDocs the total number of docs - UNUSED - * based on term frequency only - UNUSED - * @return The weight = tf param - */ - def calculate(tf: Int, df: Int = -Int.MaxValue, length: Int = -Int.MaxValue, numDocs: Int = -Int.MaxValue): Double = { - tf - } -} - - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/util/IOUtilsScala.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/util/IOUtilsScala.scala b/math-scala/src/main/scala/org/apache/mahout/util/IOUtilsScala.scala deleted file mode 100644 index b61bea4..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/util/IOUtilsScala.scala +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.util - -import org.apache.mahout.logging._ -import collection._ -import java.io.Closeable - -object IOUtilsScala { - - private final implicit val log = getLog(IOUtilsScala.getClass) - - /** - * Try to close every resource in the sequence, in order of the sequence. - * - * Report all encountered exceptions to logging. - * - * Rethrow last exception only (if any) - * @param closeables - */ - def close(closeables: Seq[Closeable]) = { - - var lastThr: Option[Throwable] = None - closeables.foreach { c => - try { - c.close() - } catch { - case t: Throwable => - error(t.getMessage, t) - lastThr = Some(t) - } - } - - // Rethrow most recent close exception (can throw only one) - lastThr.foreach(throw _) - } - - /** - * Same as [[IOUtilsScala.close( )]] but do not re-throw any exceptions. - * @param closeables - */ - def closeQuietly(closeables: Seq[Closeable]) = { - try { - close(closeables) - } catch { - case t: Throwable => // NOP - } - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/test/scala/org/apache/mahout/classifier/naivebayes/NBTestBase.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/test/scala/org/apache/mahout/classifier/naivebayes/NBTestBase.scala b/math-scala/src/test/scala/org/apache/mahout/classifier/naivebayes/NBTestBase.scala deleted file mode 100644 index c8f8a90..0000000 --- a/math-scala/src/test/scala/org/apache/mahout/classifier/naivebayes/NBTestBase.scala +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.classifier.naivebayes - -import org.apache.mahout.math._ -import org.apache.mahout.math.scalabindings._ -import org.apache.mahout.test.DistributedMahoutSuite -import org.apache.mahout.test.MahoutSuite -import org.scalatest.{FunSuite, Matchers} -import collection._ -import JavaConversions._ -import collection.JavaConversions - -trait NBTestBase extends DistributedMahoutSuite with Matchers { this:FunSuite => - - val epsilon = 1E-6 - - test("Simple Standard NB Model") { - - // test from simulated sparse TF-IDF data - val inCoreTFIDF = sparse( - (0, 0.7) ::(1, 0.1) ::(2, 0.1) ::(3, 0.3) :: Nil, - (0, 0.4) ::(1, 0.4) ::(2, 0.1) ::(3, 0.1) :: Nil, - (0, 0.1) ::(1, 0.0) ::(2, 0.8) ::(3, 0.1) :: Nil, - (0, 0.1) ::(1, 0.1) ::(2, 0.1) ::(3, 0.7) :: Nil - ) - - val TFIDFDrm = drm.drmParallelize(m = inCoreTFIDF, numPartitions = 2) - - val labelIndex = new java.util.HashMap[String,Integer]() - labelIndex.put("Cat1", 3) - labelIndex.put("Cat2", 2) - labelIndex.put("Cat3", 1) - labelIndex.put("Cat4", 0) - - // train a Standard NB Model - val model = NaiveBayes.train(TFIDFDrm, labelIndex, false) - - // validate the model- will throw an exception if model is invalid - model.validate() - - // check the labelWeights - model.labelWeight(0) - 1.2 should be < epsilon - model.labelWeight(1) - 1.0 should be < epsilon - model.labelWeight(2) - 1.0 should be < epsilon - model.labelWeight(3) - 1.0 should be < epsilon - - // check the Feature weights - model.featureWeight(0) - 1.3 should be < epsilon - model.featureWeight(1) - 0.6 should be < epsilon - model.featureWeight(2) - 1.1 should be < epsilon - model.featureWeight(3) - 1.2 should be < epsilon - } - - test("NB Aggregator") { - - val rowBindings = new java.util.HashMap[String,Integer]() - rowBindings.put("/Cat1/doc_a/", 0) - rowBindings.put("/Cat2/doc_b/", 1) - rowBindings.put("/Cat1/doc_c/", 2) - rowBindings.put("/Cat2/doc_d/", 3) - rowBindings.put("/Cat1/doc_e/", 4) - - - val matrixSetup = sparse( - (0, 0.1) ::(1, 0.0) ::(2, 0.1) ::(3, 0.0) :: Nil, - (0, 0.0) ::(1, 0.1) ::(2, 0.0) ::(3, 0.1) :: Nil, - (0, 0.1) ::(1, 0.0) ::(2, 0.1) ::(3, 0.0) :: Nil, - (0, 0.0) ::(1, 0.1) ::(2, 0.0) ::(3, 0.1) :: Nil, - (0, 0.1) ::(1, 0.0) ::(2, 0.1) ::(3, 0.0) :: Nil - ) - - - matrixSetup.setRowLabelBindings(rowBindings) - - val TFIDFDrm = drm.drmParallelizeWithRowLabels(m = matrixSetup, numPartitions = 2) - - val (labelIndex, aggregatedTFIDFDrm) = NaiveBayes.extractLabelsAndAggregateObservations(TFIDFDrm) - - labelIndex.size should be (2) - - val cat1=labelIndex("Cat1") - val cat2=labelIndex("Cat2") - - cat1 should be (0) - cat2 should be (1) - - val aggregatedTFIDFInCore = aggregatedTFIDFDrm.collect - aggregatedTFIDFInCore.numCols should be (4) - aggregatedTFIDFInCore.numRows should be (2) - - aggregatedTFIDFInCore.get(cat1, 0) - 0.3 should be < epsilon - aggregatedTFIDFInCore.get(cat1, 1) - 0.0 should be < epsilon - aggregatedTFIDFInCore.get(cat1, 2) - 0.3 should be < epsilon - aggregatedTFIDFInCore.get(cat1, 3) - 0.0 should be < epsilon - aggregatedTFIDFInCore.get(cat2, 0) - 0.0 should be < epsilon - aggregatedTFIDFInCore.get(cat2, 1) - 0.2 should be < epsilon - aggregatedTFIDFInCore.get(cat2, 2) - 0.0 should be < epsilon - aggregatedTFIDFInCore.get(cat2, 3) - 0.2 should be < epsilon - - } - - test("Model DFS Serialization") { - - // test from simulated sparse TF-IDF data - val inCoreTFIDF = sparse( - (0, 0.7) ::(1, 0.1) ::(2, 0.1) ::(3, 0.3) :: Nil, - (0, 0.4) ::(1, 0.4) ::(2, 0.1) ::(3, 0.1) :: Nil, - (0, 0.1) ::(1, 0.0) ::(2, 0.8) ::(3, 0.1) :: Nil, - (0, 0.1) ::(1, 0.1) ::(2, 0.1) ::(3, 0.7) :: Nil - ) - - val labelIndex = new java.util.HashMap[String,Integer]() - labelIndex.put("Cat1", 0) - labelIndex.put("Cat2", 1) - labelIndex.put("Cat3", 2) - labelIndex.put("Cat4", 3) - - val TFIDFDrm = drm.drmParallelize(m = inCoreTFIDF, numPartitions = 2) - - // train a Standard NB Model- no label index here - val model = NaiveBayes.train(TFIDFDrm, labelIndex, false) - - // validate the model- will throw an exception if model is invalid - model.validate() - - // save the model - model.dfsWrite(TmpDir) - - // reload a new model which should be equal to the original - // this will automatically trigger a validate() call - val materializedModel= NBModel.dfsRead(TmpDir) - - - // check the labelWeights - model.labelWeight(0) - materializedModel.labelWeight(0) should be < epsilon //1.2 - model.labelWeight(1) - materializedModel.labelWeight(1) should be < epsilon //1.0 - model.labelWeight(2) - materializedModel.labelWeight(2) should be < epsilon //1.0 - model.labelWeight(3) - materializedModel.labelWeight(3) should be < epsilon //1.0 - - // check the Feature weights - model.featureWeight(0) - materializedModel.featureWeight(0) should be < epsilon //1.3 - model.featureWeight(1) - materializedModel.featureWeight(1) should be < epsilon //0.6 - model.featureWeight(2) - materializedModel.featureWeight(2) should be < epsilon //1.1 - model.featureWeight(3) - materializedModel.featureWeight(3) should be < epsilon //1.2 - - // check to se if the new model is complementary - materializedModel.isComplementary should be (model.isComplementary) - - // check the label indexMaps - for(elem <- model.labelIndex){ - model.labelIndex(elem._1) == materializedModel.labelIndex(elem._1) should be (true) - } - } - - test("train and test a model") { - - // test from simulated sparse TF-IDF data - val inCoreTFIDF = sparse( - (0, 0.7) ::(1, 0.1) ::(2, 0.1) ::(3, 0.3) :: Nil, - (0, 0.4) ::(1, 0.4) ::(2, 0.1) ::(3, 0.1) :: Nil, - (0, 0.1) ::(1, 0.0) ::(2, 0.8) ::(3, 0.1) :: Nil, - (0, 0.1) ::(1, 0.1) ::(2, 0.1) ::(3, 0.7) :: Nil - ) - - val labelIndex = new java.util.HashMap[String,Integer]() - labelIndex.put("/Cat1/", 0) - labelIndex.put("/Cat2/", 1) - labelIndex.put("/Cat3/", 2) - labelIndex.put("/Cat4/", 3) - - val TFIDFDrm = drm.drmParallelize(m = inCoreTFIDF, numPartitions = 2) - - // train a Standard NB Model- no label index here - val model = NaiveBayes.train(TFIDFDrm, labelIndex, false) - - // validate the model- will throw an exception if model is invalid - model.validate() - - // save the model - model.dfsWrite(TmpDir) - - // reload a new model which should be equal to the original - // this will automatically trigger a validate() call - val materializedModel= NBModel.dfsRead(TmpDir) - - - // check to se if the new model is complementary - materializedModel.isComplementary should be (model.isComplementary) - - // check the label indexMaps - for(elem <- model.labelIndex){ - model.labelIndex(elem._1) == materializedModel.labelIndex(elem._1) should be (true) - } - - - //self test on the original set - val inCoreTFIDFWithLabels = inCoreTFIDF.clone() - inCoreTFIDFWithLabels.setRowLabelBindings(labelIndex) - val TFIDFDrmWithLabels = drm.drmParallelizeWithRowLabels(m = inCoreTFIDFWithLabels, numPartitions = 2) - - NaiveBayes.test(materializedModel,TFIDFDrmWithLabels , false) - - } - - test("train and test a model with the confusion matrix") { - - val rowBindings = new java.util.HashMap[String,Integer]() - rowBindings.put("/Cat1/doc_a/", 0) - rowBindings.put("/Cat2/doc_b/", 1) - rowBindings.put("/Cat1/doc_c/", 2) - rowBindings.put("/Cat2/doc_d/", 3) - rowBindings.put("/Cat1/doc_e/", 4) - rowBindings.put("/Cat2/doc_f/", 5) - rowBindings.put("/Cat1/doc_g/", 6) - rowBindings.put("/Cat2/doc_h/", 7) - rowBindings.put("/Cat1/doc_i/", 8) - rowBindings.put("/Cat2/doc_j/", 9) - - val seed = 1 - - val matrixSetup = Matrices.uniformView(10, 50 , seed) - - println("TFIDF matrix") - println(matrixSetup) - - matrixSetup.setRowLabelBindings(rowBindings) - - val TFIDFDrm = drm.drmParallelizeWithRowLabels(matrixSetup) - - // println("Parallelized and Collected") - // println(TFIDFDrm.collect) - - val (labelIndex, aggregatedTFIDFDrm) = NaiveBayes.extractLabelsAndAggregateObservations(TFIDFDrm) - - println("Aggregated by key") - println(aggregatedTFIDFDrm.collect) - println(labelIndex) - - - // train a Standard NB Model- no label index here - val model = NaiveBayes.train(aggregatedTFIDFDrm, labelIndex, false) - - // validate the model- will throw an exception if model is invalid - model.validate() - - // save the model - model.dfsWrite(TmpDir) - - // reload a new model which should be equal to the original - // this will automatically trigger a validate() call - val materializedModel= NBModel.dfsRead(TmpDir) - - // check to se if the new model is complementary - materializedModel.isComplementary should be (model.isComplementary) - - // check the label indexMaps - for(elem <- model.labelIndex){ - model.labelIndex(elem._1) == materializedModel.labelIndex(elem._1) should be (true) - } - - // val testTFIDFDrm = drm.drmParallelizeWithRowLabels(m = matrixSetup, numPartitions = 2) - - // self test on this model - val result = NaiveBayes.test(materializedModel, TFIDFDrm , false) - - println(result) - - result.getConfusionMatrix.getMatrix.getQuick(0, 0) should be(5) - result.getConfusionMatrix.getMatrix.getQuick(0, 1) should be(0) - result.getConfusionMatrix.getMatrix.getQuick(1, 0) should be(0) - result.getConfusionMatrix.getMatrix.getQuick(1, 1) should be(5) - - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsTestBase.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsTestBase.scala b/math-scala/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsTestBase.scala deleted file mode 100644 index eafde11..0000000 --- a/math-scala/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsTestBase.scala +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.classifier.stats - -import java.lang.Double -import java.util.Random -import java.util.Arrays - -import org.apache.mahout.common.RandomUtils -import org.apache.mahout.math.Matrix -import org.apache.mahout.test.DistributedMahoutSuite -import org.scalatest.{FunSuite, Matchers} - - - -trait ClassifierStatsTestBase extends DistributedMahoutSuite with Matchers { this: FunSuite => - - val epsilon = 1E-6 - - val smallEpsilon = 1.0 - - // FullRunningAverageAndStdDev tests - test("testFullRunningAverageAndStdDev") { - val average: RunningAverageAndStdDev = new FullRunningAverageAndStdDev - assert(0 == average.getCount) - assert(true == Double.isNaN(average.getAverage)) - assert(true == Double.isNaN(average.getStandardDeviation)) - average.addDatum(6.0) - assert(1 == average.getCount) - assert((6.0 - average.getAverage).abs < epsilon) - assert(true == Double.isNaN(average.getStandardDeviation)) - average.addDatum(6.0) - assert(2 == average.getCount) - assert((6.0 - average.getAverage).abs < epsilon) - assert((0.0 - average.getStandardDeviation).abs < epsilon) - average.removeDatum(6.0) - assert(1 == average.getCount) - assert((6.0 - average.getAverage).abs < epsilon) - assert(true == Double.isNaN(average.getStandardDeviation)) - average.addDatum(-4.0) - assert(2 == average.getCount) - assert((1.0 - average.getAverage).abs < epsilon) - assert(((5.0 * 1.4142135623730951) - average.getStandardDeviation).abs < epsilon) - average.removeDatum(4.0) - assert(1 == average.getCount) - assert((2.0 + average.getAverage).abs < epsilon) - assert(true == Double.isNaN(average.getStandardDeviation)) - } - - test("testBigFullRunningAverageAndStdDev") { - val average: RunningAverageAndStdDev = new FullRunningAverageAndStdDev - RandomUtils.useTestSeed() - val r: Random = RandomUtils.getRandom - - for (i <- 0 until 100000) { - average.addDatum(r.nextDouble() * 1000.0) - } - - assert((500.0 - average.getAverage).abs < smallEpsilon) - assert(((1000.0 / Math.sqrt(12.0)) - average.getStandardDeviation).abs < smallEpsilon) - } - - test("testStddevFullRunningAverageAndStdDev") { - val runningAverage: RunningAverageAndStdDev = new FullRunningAverageAndStdDev - assert(0 == runningAverage.getCount) - assert(true == Double.isNaN(runningAverage.getAverage)) - runningAverage.addDatum(1.0) - assert(1 == runningAverage.getCount) - assert((1.0 - runningAverage.getAverage).abs < epsilon) - assert(true == Double.isNaN(runningAverage.getStandardDeviation)) - runningAverage.addDatum(1.0) - assert(2 == runningAverage.getCount) - assert((1.0 - runningAverage.getAverage).abs < epsilon) - assert((0.0 -runningAverage.getStandardDeviation).abs < epsilon) - runningAverage.addDatum(7.0) - assert(3 == runningAverage.getCount) - assert((3.0 - runningAverage.getAverage).abs < epsilon) - assert((3.464101552963257 - runningAverage.getStandardDeviation).abs < epsilon) - runningAverage.addDatum(5.0) - assert(4 == runningAverage.getCount) - assert((3.5 - runningAverage.getAverage) < epsilon) - assert((3.0- runningAverage.getStandardDeviation).abs < epsilon) - } - - - - // FullRunningAverage tests - test("testFullRunningAverage"){ - val runningAverage: RunningAverage = new FullRunningAverage - assert(0 == runningAverage.getCount) - assert(true == Double.isNaN(runningAverage.getAverage)) - runningAverage.addDatum(1.0) - assert(1 == runningAverage.getCount) - assert((1.0 - runningAverage.getAverage).abs < epsilon) - runningAverage.addDatum(1.0) - assert(2 == runningAverage.getCount) - assert((1.0 - runningAverage.getAverage).abs < epsilon) - runningAverage.addDatum(4.0) - assert(3 == runningAverage.getCount) - assert((2.0 - runningAverage.getAverage) < epsilon) - runningAverage.addDatum(-4.0) - assert(4 == runningAverage.getCount) - assert((0.5 - runningAverage.getAverage).abs < epsilon) - runningAverage.removeDatum(-4.0) - assert(3 == runningAverage.getCount) - assert((2.0 - runningAverage.getAverage).abs < epsilon) - runningAverage.removeDatum(4.0) - assert(2 == runningAverage.getCount) - assert((1.0 - runningAverage.getAverage).abs < epsilon) - runningAverage.changeDatum(0.0) - assert(2 == runningAverage.getCount) - assert((1.0 - runningAverage.getAverage).abs < epsilon) - runningAverage.changeDatum(2.0) - assert(2 == runningAverage.getCount) - assert((2.0 - runningAverage.getAverage).abs < epsilon) - } - - - test("testFullRunningAveragCopyConstructor") { - val runningAverage: RunningAverage = new FullRunningAverage - runningAverage.addDatum(1.0) - runningAverage.addDatum(1.0) - assert(2 == runningAverage.getCount) - assert(1.0 - runningAverage.getAverage < epsilon) - val copy: RunningAverage = new FullRunningAverage(runningAverage.getCount, runningAverage.getAverage) - assert(2 == copy.getCount) - assert(1.0 - copy.getAverage < epsilon) - } - - - - // Inverted Running Average tests - test("testInvertedRunningAverage") { - val avg: RunningAverage = new FullRunningAverage - val inverted: RunningAverage = new InvertedRunningAverage(avg) - assert(0 == inverted.getCount) - avg.addDatum(1.0) - assert(1 == inverted.getCount) - assert((1.0 + inverted.getAverage).abs < epsilon) // inverted.getAverage == -1.0 - avg.addDatum(2.0) - assert(2 == inverted.getCount) - assert((1.5 + inverted.getAverage).abs < epsilon) // inverted.getAverage == -1.5 - } - - test ("testInvertedRunningAverageAndStdDev") { - val avg: RunningAverageAndStdDev = new FullRunningAverageAndStdDev - val inverted: RunningAverageAndStdDev = new InvertedRunningAverageAndStdDev(avg) - assert(0 == inverted.getCount) - avg.addDatum(1.0) - assert(1 == inverted.getCount) - assert(((1.0 + inverted.getAverage).abs < epsilon)) // inverted.getAverage == -1.0 - avg.addDatum(2.0) - assert(2 == inverted.getCount) - assert((1.5 + inverted.getAverage).abs < epsilon) // inverted.getAverage == -1.5 - assert(((Math.sqrt(2.0) / 2.0) - inverted.getStandardDeviation).abs < epsilon) - } - - - // confusion Matrix tests - val VALUES: Array[Array[Int]] = Array(Array(2, 3), Array(10, 20)) - val LABELS: Array[String] = Array("Label1", "Label2") - val OTHER: Array[Int] = Array(3, 6) - val DEFAULT_LABEL: String = "other" - - def fillConfusionMatrix(values: Array[Array[Int]], labels: Array[String], defaultLabel: String): ConfusionMatrix = { - val labelList = Arrays.asList(labels(0),labels(1)) - val confusionMatrix: ConfusionMatrix = new ConfusionMatrix(labelList, defaultLabel) - confusionMatrix.putCount("Label1", "Label1", values(0)(0)) - confusionMatrix.putCount("Label1", "Label2", values(0)(1)) - confusionMatrix.putCount("Label2", "Label1", values(1)(0)) - confusionMatrix.putCount("Label2", "Label2", values(1)(1)) - confusionMatrix.putCount("Label1", DEFAULT_LABEL, OTHER(0)) - confusionMatrix.putCount("Label2", DEFAULT_LABEL, OTHER(1)) - - confusionMatrix - } - - private def checkAccuracy(cm: ConfusionMatrix) { - val labelstrs = cm.getLabels - assert(3 == labelstrs.size) - assert((25.0 - cm.getAccuracy("Label1")).abs < epsilon) - assert((55.5555555 - cm.getAccuracy("Label2")).abs < epsilon) - assert(true == Double.isNaN(cm.getAccuracy("other"))) - } - - private def checkValues(cm: ConfusionMatrix) { - val counts: Array[Array[Int]] = cm.getConfusionMatrix - cm.toString - assert(counts.length == counts(0).length) - assert(3 == counts.length) - assert(VALUES(0)(0) == counts(0)(0)) - assert(VALUES(0)(1) == counts(0)(1)) - assert(VALUES(1)(0) == counts(1)(0)) - assert(VALUES(1)(1) == counts(1)(1)) - assert(true == Arrays.equals(new Array[Int](3), counts(2))) - assert(OTHER(0) == counts(0)(2)) - assert(OTHER(1) == counts(1)(2)) - assert(3 == cm.getLabels.size) - assert(true == cm.getLabels.contains(LABELS(0))) - assert(true == cm.getLabels.contains(LABELS(1))) - assert(true == cm.getLabels.contains(DEFAULT_LABEL)) - } - - test("testBuild"){ - val confusionMatrix: ConfusionMatrix = fillConfusionMatrix(VALUES, LABELS, DEFAULT_LABEL) - checkValues(confusionMatrix) - checkAccuracy(confusionMatrix) - } - - test("GetMatrix") { - val confusionMatrix: ConfusionMatrix = fillConfusionMatrix(VALUES, LABELS, DEFAULT_LABEL) - val m: Matrix = confusionMatrix.getMatrix - val rowLabels = m.getRowLabelBindings - assert(confusionMatrix.getLabels.size == m.numCols) - assert(true == rowLabels.keySet.contains(LABELS(0))) - assert(true == rowLabels.keySet.contains(LABELS(1))) - assert(true == rowLabels.keySet.contains(DEFAULT_LABEL)) - assert(2 == confusionMatrix.getCorrect(LABELS(0))) - assert(20 == confusionMatrix.getCorrect(LABELS(1))) - assert(0 == confusionMatrix.getCorrect(DEFAULT_LABEL)) - } - - /** - * Example taken from - * http://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_recall_fscore_support.html - */ - test("testPrecisionRecallAndF1ScoreAsScikitLearn") { - val labelList = Arrays.asList("0", "1", "2") - val confusionMatrix: ConfusionMatrix = new ConfusionMatrix(labelList, "DEFAULT") - confusionMatrix.putCount("0", "0", 2) - confusionMatrix.putCount("1", "0", 1) - confusionMatrix.putCount("1", "2", 1) - confusionMatrix.putCount("2", "1", 2) - val delta: Double = 0.001 - assert((0.222 - confusionMatrix.getWeightedPrecision).abs < delta) - assert((0.333 - confusionMatrix.getWeightedRecall).abs < delta) - assert((0.266 - confusionMatrix.getWeightedF1score).abs < delta) - } - - - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/test/scala/org/apache/mahout/math/algorithms/ClusteringSuiteBase.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/test/scala/org/apache/mahout/math/algorithms/ClusteringSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/algorithms/ClusteringSuiteBase.scala deleted file mode 100644 index 70fb10f..0000000 --- a/math-scala/src/test/scala/org/apache/mahout/math/algorithms/ClusteringSuiteBase.scala +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.mahout.math.algorithms - -import org.apache.mahout.math.algorithms.preprocessing._ -import org.apache.mahout.math.drm.drmParallelize -import org.apache.mahout.math.scalabindings.{dense, sparse, svec} -import org.apache.mahout.math.scalabindings.RLikeOps._ -import org.apache.mahout.test.DistributedMahoutSuite -import org.scalatest.{FunSuite, Matchers} - -import org.apache.mahout.test.DistributedMahoutSuite - -trait ClusteringSuiteBase extends DistributedMahoutSuite with Matchers { - - this: FunSuite => - - test("canopy test") { - val drmA = drmParallelize(dense((1.0, 1.2, 1.3, 1.4), (1.1, 1.5, 2.5, 1.0), (6.0, 5.2, -5.2, 5.3), (7.0,6.0, 5.0, 5.0), (10.0, 1.0, 20.0, -10.0))) - - import org.apache.mahout.math.algorithms.clustering.CanopyClustering - - val model = new CanopyClustering().fit(drmA, 't1 -> 6.5, 't2 -> 5.5, 'distanceMeasure -> 'Chebyshev) - val myAnswer = model.cluster(drmA).collect - - val correctAnswer = dense((0.0), (0.0), (1.0), (0.0), (2.0)) - - val epsilon = 1E-6 - (myAnswer.norm - correctAnswer.norm) should be <= epsilon - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuiteBase.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuiteBase.scala deleted file mode 100644 index ffe1d1b..0000000 --- a/math-scala/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuiteBase.scala +++ /dev/null @@ -1,118 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.mahout.math.algorithms - -import org.apache.mahout.math.algorithms.preprocessing._ -import org.apache.mahout.math.drm.drmParallelize -import org.apache.mahout.math.scalabindings.{dense, sparse, svec} -import org.apache.mahout.math.scalabindings.RLikeOps._ -import org.apache.mahout.test.DistributedMahoutSuite -import org.scalatest.{FunSuite, Matchers} - -trait PreprocessorSuiteBase extends DistributedMahoutSuite with Matchers { - - this: FunSuite => - - test("asfactor test") { - val A = drmParallelize(dense( - (3, 2, 1, 2), - (0, 0, 0, 0), - (1, 1, 1, 1)), numPartitions = 2) - - // 0 -> 2, 3 -> 5, 6 -> 9 - val factorizer: AsFactorModel = new AsFactor().fit(A) - - val factoredA = factorizer.transform(A) - - println(factoredA) - println(factorizer.factorMap) - val correctAnswer = sparse( - svec((3 → 1.0) :: (6 → 1.0) :: (8 → 1.0) :: (11 → 1.0) :: Nil, cardinality = 12), - svec((0 → 1.0) :: (4 → 1.0) :: (7 → 1.0) :: ( 9 → 1.0) :: Nil, cardinality = 12), - svec((1 → 1.0) :: (5 → 1.0) :: (8 → 1.0) :: (10 → 1.0) :: Nil, cardinality = 12) - ) - - val myAnswer = factoredA.collect - - val epsilon = 1E-6 - (myAnswer.norm - correctAnswer.norm) should be <= epsilon - (myAnswer.norm - correctAnswer.norm) should be <= epsilon - - } - - test("standard scaler test") { - /** - * R Prototype - * x <- matrix( c(1,2,3,1,5,9,5,-15,-2), nrow=3) - * scale(x, scale= apply(x, 2, sd) * sqrt(2/3)) - * # ^^ note: R uses degress of freedom = 1 for standard deviation calculations. - * # we don't (and neither does sklearn) - * # the *sqrt(N-1/N) 'undoes' the degrees of freedom = 1 - */ - - val A = drmParallelize(dense( - (1, 1, 5), - (2, 5, -15), - (3, 9, -2)), numPartitions = 2) - - val scaler: StandardScalerModel = new StandardScaler().fit(A) - - val correctAnswer = dense( - (-1.224745, -1.224745, -1.224745), - (0.000000, 0.000000, 1.224745), - (1.224745, 1.224745, 0.000000)) - - val myAnswer = scaler.transform(A).collect - println(scaler.meanVec) - println(scaler.stdev) - - val epsilon = 1E-6 - (myAnswer.norm - correctAnswer.norm) should be <= epsilon - - } - - test("mean center test") { - /** - * R Prototype - * - * x <- matrix( c(1.0,2.0,3.0,1.0,5.0,9.0,-2.0,2.0,0), nrow=3) - * centered.x <- scale(x, scale= FALSE) - * print(centered.x) - */ - - - val A = drmParallelize(dense( - (1, 1, -2), - (2, 5, 2), - (3, 9, 0)), numPartitions = 2) - - val scaler: MeanCenterModel = new MeanCenter().fit(A) - - val myAnswer = scaler.transform(A).collect - - val correctAnswer = dense( - (-1, -4, -2), - (0, 0, 2), - (1, 4, 0)) - - val epsilon = 1E-6 - (myAnswer.norm - correctAnswer.norm) should be <= epsilon - } -}