spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mengxr <...@git.apache.org>
Subject [GitHub] spark pull request: [SPARK-2352] [ML] Multilayer Perceptron
Date Wed, 29 Jul 2015 23:35:40 GMT
Github user mengxr commented on a diff in the pull request:

    https://github.com/apache/spark/pull/7621#discussion_r35824436
  
    --- Diff: mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala ---
    @@ -0,0 +1,857 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.ml.ann
    +
    +import breeze.linalg.{*, DenseMatrix => BDM, DenseVector => BDV, Vector => BV,
axpy => brzAxpy,
    +sum => Bsum}
    +import breeze.numerics.{log => Blog, sigmoid => Bsigmoid}
    +import org.apache.spark.mllib.linalg.{Vector, Vectors}
    +import org.apache.spark.mllib.optimization._
    +import org.apache.spark.rdd.RDD
    +import org.apache.spark.util.random.XORShiftRandom
    +
    +/**
    + * Trait that holds Layer properties, that are needed to instantiate it.
    + * Implements Layer instantiation.
    + *
    + */
    +private[ann] trait Layer extends Serializable {
    +  /**
    +   * Returns the instance of the layer based on weights provided
    +   * @param weights vector with layer weights
    +   * @param position position of weights in the vector
    +   * @return the layer model
    +   */
    +  def getInstance(weights: Vector, position: Int): LayerModel
    +
    +  /**
    +   * Returns the instance of the layer with random generated weights
    +   * @param seed seed
    +   * @return the layer model
    +   */
    +  def getInstance(seed: Long): LayerModel
    +}
    +
    +/**
    + * Trait that holds Layer weights (or parameters).
    + * Implements functions needed for forward propagation, computing delta and gradient.
    + * Can return weights in Vector format.
    + */
    +private[ann] trait LayerModel extends Serializable {
    +  /**
    +   * number of weights
    +   */
    +  val size: Int
    +
    +  /**
    +   * Evaluates the data (process the data through the layer)
    +   * @param data data
    +   * @return processed data
    +   */
    +  def eval(data: BDM[Double]): BDM[Double]
    +
    +  /**
    +   * Computes the delta for back propagation
    +   * @param nextDelta delta of the next layer
    +   * @param input input data
    +   * @return delta
    +   */
    +  def prevDelta(nextDelta: BDM[Double], input: BDM[Double]): BDM[Double]
    +
    +  /**
    +   * Computes the gradient
    +   * @param delta delta for this layer
    +   * @param input input data
    +   * @return gradient
    +   */
    +  def grad(delta: BDM[Double], input: BDM[Double]): Array[Double]
    +
    +  /**
    +   * Returns weights for the layer in a single vector
    +   * @return layer weights
    +   */
    +  def weights(): Vector
    +}
    +
    +/**
    + * Layer properties of affine transformations, that is y=A*x+b
    + * @param numIn number of inputs
    + * @param numOut number of outputs
    + */
    +private[ann] class AffineLayer(val numIn: Int, val numOut: Int) extends Layer {
    +
    +  override def getInstance(weights: Vector, position: Int): LayerModel = {
    +    AffineLayerModel(this, weights, position)
    +  }
    +
    +  override def getInstance(seed: Long = 11L): LayerModel = {
    +    AffineLayerModel(this, seed)
    +  }
    +}
    +
    +/**
    + * Model of Affine layer y=A*x+b
    + * @param w weights (matrix A)
    + * @param b bias (vector b)
    + */
    +private[ann] class AffineLayerModel private(w: BDM[Double], b: BDV[Double]) extends LayerModel
{
    +  val size = w.size + b.length
    +  val gwb = new Array[Double](size)
    +  private lazy val gw: BDM[Double] = new BDM[Double](w.rows, w.cols, gwb)
    +  private lazy val gb: BDV[Double] = new BDV[Double](gwb, w.size)
    +  private var z: BDM[Double] = null
    +  private var d: BDM[Double] = null
    +  private var ones: BDV[Double] = null
    +
    +  override def eval(data: BDM[Double]): BDM[Double] = {
    +    if (z == null || z.cols != data.cols) z = new BDM[Double](w.rows, data.cols)
    +    z(::, *) := b
    +    BreezeUtil.dgemm(1.0, w, data, 1.0, z)
    +    z
    +  }
    +
    +  override def prevDelta(nextDelta: BDM[Double], input: BDM[Double]): BDM[Double] = {
    +    if (d == null || d.cols != nextDelta.cols) d = new BDM[Double](w.cols, nextDelta.cols)
    +    BreezeUtil.dgemm(1.0, w.t, nextDelta, 0.0, d)
    +    d
    +  }
    +
    +  override def grad(delta: BDM[Double], input: BDM[Double]): Array[Double] = {
    +    BreezeUtil.dgemm(1.0 / input.cols, delta, input.t, 0.0, gw)
    +    if (ones == null || ones.length != delta.cols) ones = BDV.ones[Double](delta.cols)
    +    BreezeUtil.dgemv(1.0 / input.cols, delta, ones, 0.0, gb)
    +    gwb
    +  }
    +
    +  override def weights(): Vector = AffineLayerModel.roll(w, b)
    +}
    +
    +/**
    + * Fabric for Affine layer models
    + */
    +private[ann] object AffineLayerModel {
    +
    +  /**
    +   * Creates a model of Affine layer
    +   * @param layer layer properties
    +   * @param weights vector with weights
    +   * @param position position of weights in the vector
    +   * @return model of Affine layer
    +   */
    +  def apply(layer: AffineLayer, weights: Vector, position: Int): AffineLayerModel = {
    +    val (w, b) = unroll(weights, position, layer.numIn, layer.numOut)
    +    new AffineLayerModel(w, b)
    +  }
    +
    +  /**
    +   * Creates a model of Affine layer
    +   * @param layer layer properties
    +   * @param seed seed
    +   * @return model of Affine layer
    +   */
    +  def apply(layer: AffineLayer, seed: Long): AffineLayerModel = {
    +    val (w, b) = randomWeights(layer.numIn, layer.numOut, seed)
    +    new AffineLayerModel(w, b)
    +  }
    +
    +  /**
    +   * Unrolls the weights from the vector
    +   * @param weights vector with weights
    +   * @param position position of weights for this layer
    +   * @param numIn number of layer inputs
    +   * @param numOut number of layer outputs
    +   * @return matrix A and vector b
    +   */
    +  def unroll(weights: Vector, position: Int,
    +             numIn: Int, numOut: Int): (BDM[Double], BDV[Double]) = {
    +    val weightsCopy = weights.toArray
    +    // TODO: the array is not copied to BDMs, make sure this is OK!
    +    val a = new BDM[Double](numOut, numIn, weightsCopy, position)
    +    val b = new BDV[Double](weightsCopy, position + (numOut * numIn), 1, numOut)
    +    (a, b)
    +  }
    +
    +  /**
    +   * Roll the layer weights into a vector
    +   * @param a matrix A
    +   * @param b vector b
    +   * @return vector of weights
    +   */
    +  def roll(a: BDM[Double], b: BDV[Double]): Vector = {
    +    val result = new Array[Double](a.size + b.length)
    +    // TODO: make sure that we need to copy!
    +    System.arraycopy(a.toArray, 0, result, 0, a.size)
    +    System.arraycopy(b.toArray, 0, result, a.size, b.length)
    +    Vectors.dense(result)
    +  }
    +
    +  /**
    +   * Generate random weights for the layer
    +   * @param numIn number of inputs
    +   * @param numOut number of outputs
    +   * @param seed seed
    +   * @return (matrix A, vector b)
    +   */
    +  def randomWeights(numIn: Int, numOut: Int, seed: Long = 11L): (BDM[Double], BDV[Double])
= {
    +    val rand: XORShiftRandom = new XORShiftRandom(seed)
    +    val weights = BDM.fill[Double](numOut, numIn){ (rand.nextDouble * 4.8 - 2.4) / numIn
}
    +    val bias = BDV.fill[Double](numOut){ (rand.nextDouble * 4.8 - 2.4) / numIn }
    +    (weights, bias)
    +  }
    +}
    +
    +/**
    + * Trait for functions and their derivatives for functional layers
    + */
    +private[ann] trait ActivationFunction extends Serializable {
    +
    +  /**
    +   * Implements a function
    +   * @param x input data
    +   * @param y output data
    +   */
    +  def eval(x: BDM[Double], y: BDM[Double]): Unit
    +
    +  /**
    +   * Implements a derivative of a function (needed for the back propagation)
    +   * @param x input data
    +   * @param y output data
    +   */
    +  def derivative(x: BDM[Double], y: BDM[Double]): Unit
    +
    +  /**
    +   * Implements a cross entropy error of a function.
    +   * Needed if the functional layer that contains this function is the output layer
    +   * of the network.
    +   * @param target target output
    +   * @param output computed output
    +   * @param result intermediate result
    +   * @return cross-entropy
    +   */
    +  def crossEntropy(target: BDM[Double], output: BDM[Double], result: BDM[Double]): Double
    +
    +  /**
    +   * Implements a mean squared error of a function
    +   * @param target target output
    +   * @param output computed output
    +   * @param result intermediate result
    +   * @return mean squared error
    +   */
    +  def squared(target: BDM[Double], output: BDM[Double], result: BDM[Double]): Double
    +}
    +
    +/**
    + * Implements in-place application of functions
    + */
    +private[ann] object ActivationFunction {
    +
    +  def apply(x: BDM[Double], y: BDM[Double], func: Double => Double): Unit = {
    +    var i = 0
    +    while (i < x.rows) {
    +      var j = 0
    +      while (j < x.cols) {
    +        y(i, j) = func(x(i, j))
    +        j += 1
    +      }
    +      i += 1
    +    }
    +  }
    +
    +  def apply(x1: BDM[Double], x2: BDM[Double], y: BDM[Double],
    +            func: (Double, Double) => Double): Unit = {
    +    var i = 0
    +    while (i < x1.rows) {
    +      var j = 0
    +      while (j < x1.cols) {
    +        y(i, j) = func(x1(i, j), x2(i, j))
    +        j += 1
    +      }
    +      i += 1
    +    }
    +  }
    +
    +}
    +
    +/**
    + * Implements SoftMax activation function
    + */
    +private[ann] class SoftmaxFunction extends ActivationFunction {
    +  override def eval(x: BDM[Double], y: BDM[Double]): Unit = {
    +    var j = 0
    +    // find max value to make sure later that exponent is computable
    +    while (j < x.cols) {
    +      var i = 0
    +      var max = Double.MinValue
    +      while (i < x.rows) {
    +        if (x(i, j) > max) {
    +          max = x(i, j)
    +        }
    +        i += 1
    +      }
    +      var sum = 0.0
    +      i = 0
    +      while (i < x.rows) {
    +        val res = Math.exp(x(i, j) - max)
    +        y(i, j) = res
    +        sum += res
    +        i += 1
    +      }
    +      i = 0
    +      while (i < x.rows) {
    +        y(i, j) /= sum
    +        i += 1
    +      }
    +      j += 1
    +    }
    +  }
    +
    +  override def crossEntropy(output: BDM[Double], target: BDM[Double],
    +                            result: BDM[Double]): Double = {
    +    def m(o: Double, t: Double): Double = o - t
    +    ActivationFunction(output, target, result, m)
    +    -Bsum( target :* Blog(output)) / output.cols
    +  }
    +
    +  override def derivative(x: BDM[Double], y: BDM[Double]): Unit = {
    +    def sd(z: Double): Double = (1 - z) * z
    +    ActivationFunction(x, y, sd)
    +  }
    +
    +  override def squared(output: BDM[Double], target: BDM[Double], result: BDM[Double]):
Double = {
    +    throw new UnsupportedOperationException("Sorry, squared error is not defined for
SoftMax.")
    +  }
    +}
    +
    +/**
    + * Implements Sigmoid activation function
    + */
    +private[ann] class SigmoidFunction extends ActivationFunction {
    +  override def eval(x: BDM[Double], y: BDM[Double]): Unit = {
    +    def s(z: Double): Double = Bsigmoid(z)
    +    ActivationFunction(x, y, s)
    +  }
    +
    +  override def crossEntropy(output: BDM[Double], target: BDM[Double],
    +                            result: BDM[Double]): Double = {
    +    def m(o: Double, t: Double): Double = o - t
    +    ActivationFunction(output, target, result, m)
    +    -Bsum( target :* Blog(output)) / output.cols
    +  }
    +
    +  override def derivative(x: BDM[Double], y: BDM[Double]): Unit = {
    +    def sd(z: Double): Double = (1 - z) * z
    +    ActivationFunction(x, y, sd)
    +  }
    +
    +  override def squared(output: BDM[Double], target: BDM[Double], result: BDM[Double]):
Double = {
    +    // TODO: make it readable
    +    def m(o: Double, t: Double): Double = (o - t)
    +    ActivationFunction(output, target, result, m)
    +    val e = Bsum(result :* result) / 2 / output.cols
    +    def m2(x: Double, o: Double) = x * (o - o * o)
    +    ActivationFunction(result, output, result, m2)
    +    e
    +  }
    +}
    +
    +/**
    + * Functional layer properties, y = f(x)
    + * @param activationFunction activation function
    + */
    +private[ann] class FunctionalLayer (val activationFunction: ActivationFunction) extends
Layer {
    +  override def getInstance(weights: Vector, position: Int): LayerModel = getInstance(0L)
    +
    +  override def getInstance(seed: Long): LayerModel =
    +    FunctionalLayerModel(this)
    +}
    +
    +/**
    + * Functional layer model. Holds no weights.
    + * @param activationFunction activation function
    + */
    +private[ann] class FunctionalLayerModel private (val activationFunction: ActivationFunction
    +                                     ) extends LayerModel {
    --- End diff --
    
    indentation


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message