singa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zhaoj...@apache.org
Subject [43/50] [abbrv] incubator-singa git commit: SINGA-184 Add Cross Entropy loss computation
Date Mon, 13 Jun 2016 13:20:36 GMT
SINGA-184 Add Cross Entropy loss computation

Update softmaxcrossentropy layer to support both cpp and cuda devices;

Fix bugs from crossentropy fwd and bwd; need the cuda version exp();


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ec17acab
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ec17acab
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ec17acab

Branch: refs/heads/master
Commit: ec17acab49d595fdc48b2dae6f71901b5a4c8191
Parents: efd7b62
Author: Wei Wang <wangwei@comp.nus.edu.sg>
Authored: Fri May 27 17:25:01 2016 +0800
Committer: Wei Wang <wangwei@comp.nus.edu.sg>
Committed: Mon Jun 13 11:12:05 2016 +0800

----------------------------------------------------------------------
 include/singa/core/tensor.h             |  17 +++--
 include/singa/model/loss.h              |  47 ++++++++++++
 src/CMakeLists.txt                      |   3 +-
 src/core/tensor/math_kernel.cu          |  37 +++++++++-
 src/core/tensor/math_kernel.h           |   9 ++-
 src/core/tensor/tensor.cc               |  52 +++++++++----
 src/core/tensor/tensor_math.h           |  24 ++++--
 src/core/tensor/tensor_math_cpp.h       |  50 +++++++++++--
 src/core/tensor/tensor_math_cuda.h      |  41 ++++++++---
 src/model/layer/softmax.cc              |   7 +-
 src/model/loss/cross_entropy.h          | 105 ---------------------------
 src/model/loss/mse.cc                   |  41 +++++++++++
 src/model/loss/mse.h                    |  66 -----------------
 src/model/loss/softmax_cross_entropy.cc |  53 ++++++++++++++
 test/singa/test_cross_entropy.cc        |  64 ++++++++++++++--
 test/singa/test_mse.cc                  |   6 +-
 test/singa/test_softmax.cc              |   9 +--
 17 files changed, 393 insertions(+), 238 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index bb8d7f8..865e1e4 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -239,11 +239,10 @@ Tensor Sum(const Tensor &t, int axis);
 /// if 'axis' is 1, average all columns into a single column
 /// TODO(wangwei) support arbitrary Tensor like numpy.average
 Tensor Average(const Tensor &t, int axis);
-/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis-1] rows,
-/// and shape_[axis]*...*shape_[nDim()] columns.
-/// and do softmax along each row.
-Tensor SoftMax(const Tensor &t, int axis = 0);
-void SoftMax(const Tensor &t, int axis, Tensor *ret);
+/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
+Tensor SoftMax(const Tensor &in);
+/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
+void SoftMax(const Tensor &in, Tensor *out);
 
 /// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis] rows,
 /// and shape_[axis+1]*...*shape_[nDim()] columns.
@@ -398,6 +397,14 @@ Tensor DivRow(const Tensor &lhs, const Tensor &rhs);
 void DivRow(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
 */
 
+/// Compute the cross entropy loss given the prediction probability 'p' and
+/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
+/// or 2-d matrix. 'loss' is 1-d vector. The loss is computed into p.
+void ComputeCrossEntropy(const Tensor& t, Tensor* p);
+/// Compute the dx, given prediction probability 'p' (p=softmax(x)) and
+/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
+/// or 2-d matrix. 'grad' has the same shape as 'p'. dx is computed into p.
+void SoftmaxCrossEntropyBwd(const Tensor& t, Tensor* p);
 }  // namespace singa
 
 #endif  // SINGA_CORE_TENSOR_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/include/singa/model/loss.h
----------------------------------------------------------------------
diff --git a/include/singa/model/loss.h b/include/singa/model/loss.h
index 6a23067..d188de0 100644
--- a/include/singa/model/loss.h
+++ b/include/singa/model/loss.h
@@ -18,6 +18,7 @@
 
 #ifndef SINGA_MODEL_LOSS_H_
 #define SINGA_MODEL_LOSS_H_
+#include <stack>
 #include "singa/proto/model.pb.h"
 #include "singa/core/tensor.h"
 namespace singa {
@@ -54,6 +55,52 @@ class Loss {
   /// Compute the gradients of the loss values w.r.t. the prediction.
   virtual Tensor Backward() = 0;
 };
+
+
+
+// ============= Mean Squared Error ===========================================
+/// MSE is for mean squared error or squared euclidean distance.
+class MSE : public Loss<Tensor> {
+ public:
+  /// Compute the loss values for each sample/instance given the prediction
+  /// and the target, which is 0.5/||prediction-target||^2
+  /// Users can call Average(const Tensor&) to get the average
+  /// loss value over all samples in the batch.
+  Tensor Forward(const Tensor& prediction, const Tensor& target) override;
+
+  /// Compute the gradients of the loss values w.r.t. the prediction,
+  /// which is (prediction-target)/batchsize
+  Tensor Backward() override;
+
+ private:
+  // to buffer intermediate data, i.e., prediction-target
+  std::stack<Tensor> buf_;
+};
+
+
+// ===============Softamx Cross Entropy =======================================
+/// Softmax + cross entropy for multi-category classification
+class SoftmaxCrossEntropy : public Loss<Tensor> {
+ public:
+  /// Compute the loss values for each sample/instance given the prediction
+  /// and the target, which is -log(p[idx_truth]), idx_truth is the truth
+  /// category's index and p[] is the probability for each category, computed
+  /// from Softmax(prediction).
+  /// Users can call Average(const Tensor&) to get the average
+  /// loss value over all samples in the batch.
+  Tensor Forward(const Tensor& prediction, const Tensor& target) override;
+
+  /// Compute the gradients of the loss values w.r.t. the prediction,
+  /// which is: p[idx] - 1 if idx is the truth category's index; else,
+  /// p[idx]
+  Tensor Backward() override;
+
+ private:
+  // to buffer intermediate data, i.e., probability for each category and
+  // the target (ground truth)
+  std::stack<Tensor> buf_;
+};
+
 }  // namespace singa
 
 #endif  // SINGA_MODEL_LOSS_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 28066de..23cae85 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -21,7 +21,7 @@ AUX_SOURCE_DIRECTORY(core/tensor core_source)
 FILE(GLOB_RECURSE cuda_source core "*.cu")
 set(FLAGS_BACKUP ${CMAKE_CXX_FLAGS})
 set(CMAKE_CXX_FLAGS "")
-CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC")
+CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC ")
 #message(STATUS "FLAGS ${CMAKE_CXX_FLAGS}")
 #message(STATUS "CORE ${cuda_source}")
 #message(STATUS "OBJ ${cuda_objs}")
@@ -36,6 +36,7 @@ LIST(APPEND SINGA_LINKER_LIBS singa_core)
 AUX_SOURCE_DIRECTORY(model model_source)
 AUX_SOURCE_DIRECTORY(model/layer model_source)
 AUX_SOURCE_DIRECTORY(model/optimizer model_source)
+AUX_SOURCE_DIRECTORY(model/loss model_source)
 #MESSAGE(STATUS "MODEL ${model_source}")
 ADD_LIBRARY(singa_model SHARED ${model_source})
 TARGET_LINK_LIBRARIES(singa_model ${SINGA_LINKER_LIBS})

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/math_kernel.cu
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.cu b/src/core/tensor/math_kernel.cu
index aed6add..f12763e 100644
--- a/src/core/tensor/math_kernel.cu
+++ b/src/core/tensor/math_kernel.cu
@@ -485,8 +485,26 @@ __global__ void KernelSet(const size_t num, const float x, float *out)
{
   }
 }
 
-void Set(const size_t num, const float x, float *out, cudaStream_t s) {
-  KernelSet << <ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, x, out);
+__global__
+void KernelComputeCrossEntropy(const size_t batchsize, const size_t dim, const float* p,
+    const int* t, float* loss) {
+  size_t sample = blockIdx.x * blockDim.x + threadIdx.x;
+  size_t num_threads = blockDim.x * gridDim.x;
+  for (; sample < batchsize; sample += num_threads) {
+    float prob_of_truth = p[sample * dim + t[sample]];
+    loss[sample] -= std::log(max(prob_of_truth, FLT_MIN));
+  }
+}
+
+__global__
+void KernelSoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim, const float*
p,
+    const int* t, float* grad) {
+  size_t sample = blockIdx.x * blockDim.x + threadIdx.x;
+  size_t num_threads = blockDim.x * gridDim.x;
+  for (; sample < batchsize; sample += num_threads) {
+    size_t pos = sample * dim + t[sample];
+    grad[pos] = p[pos] - 1.0f;  // TODO(wangwei) Consider p and grad are diff
+  }
 }
 void Div(const size_t num, float alpha, const float *in, float *out,
          cudaStream_t s) {
@@ -510,6 +528,21 @@ void LE(const size_t num, const float *in, const float x, float *out,
   KernelLE << <ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
 }
 
+void ComputeCrossEntropy(size_t batchsize, const size_t dim, const float* p,
+    const int *t, float *loss, cudaStream_t stream) {
+  KernelComputeCrossEntropy<<<ceil(batchsize/CU1DBLOCKF), CU1DBLOCKF>>>(batchsize,
+      dim, p, t, loss);
+}
+
+void Set(const size_t num, const float x, float *out, cudaStream_t s) {
+  KernelSet<<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>>(num, x, out);
+}
+
+void SoftmaxCrossEntropyBwd(size_t batchsize, const size_t dim, const float* p,
+    const int *t, float *grad, cudaStream_t stream) {
+  KernelSoftmaxCrossEntropyBwd<<<ceil(batchsize/CU1DBLOCKF), CU1DBLOCKF>>>(batchsize,
+      dim, p, t, grad);
+}
 }  // namespace cuda
 }  // namespace singa
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/math_kernel.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.h b/src/core/tensor/math_kernel.h
index 5c906a9..09953e4 100644
--- a/src/core/tensor/math_kernel.h
+++ b/src/core/tensor/math_kernel.h
@@ -83,13 +83,20 @@ void set_value(int n, float v, float *out);
 void threshold(int n, float alpha, const float *in, float *out);
 
 // follow the consistency guide for math API
+void ComputeCrossEntropy(const size_t batchsize, const size_t dim,
+                         const float *p, const int *t, float *loss,
+                         cudaStream_t stream);
 void Div(const size_t num, const float x, const float *in, float *out,
          cudaStream_t s);
-void Set(const size_t num, const float x, float *out, cudaStream_t s);
 void GT(size_t num, const float *in, const float x, float *out, cudaStream_t s);
 void GE(size_t num, const float *in, const float x, float *out, cudaStream_t s);
 void LT(size_t num, const float *in, const float x, float *out, cudaStream_t s);
 void LE(size_t num, const float *in, const float x, float *out, cudaStream_t s);
+void Set(const size_t num, const float x, float *out, cudaStream_t s);
+void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim,
+                            const float *p, const int *t, float *grad,
+                            cudaStream_t stream);
+
 }  // cuda
 
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 5ae375c..1ac25c6 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -77,10 +77,9 @@ void Tensor::ResetLike(const Tensor &t) {
   }
 }
 
-void Tensor::Reshape(const Shape &shape) {
-  if (Product(shape_) != Product(shape)) {
-    if (blob_ != nullptr && blob_->DecRefCount() == 0)
-      device_->FreeBlob(blob_);
+void Tensor::Reshape(const Shape& shape) {
+  if (Product(shape) != Product(shape_)) {
+    if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
     blob_ = device_->NewBlob(Product(shape) * SizeOf(data_type_));
   }
   shape_ = shape;
@@ -403,22 +402,21 @@ Tensor Average(const Tensor &t, int axis) {
   }
 }
 
-Tensor SoftMax(const Tensor &in, int axis) {
+Tensor SoftMax(const Tensor &in) {
   Tensor out(in.shape(), in.device(), in.data_type());
-  SoftMax(in, axis, &out);
+  SoftMax(in, &out);
   return out;
 }
 
-void SoftMax(const Tensor &in, int axis, Tensor *out) {
+void SoftMax(const Tensor &in, Tensor *out) {
+  CHECK_LE(in.nDim(), 2u);
+  Exp(in, out);
   size_t nrow = 1, ncol = in.Size(), size = ncol;
-  CHECK_GE(axis, 0);
-  if (axis > 0) {
-    nrow = Product(in.shape(), 0, axis);
-    CHECK_EQ(size % nrow, 0u) << "Size = " << size << " nrow = " <<
nrow;
+  if (in.nDim() == 2u) {
+    nrow = in.shape(0);
     ncol = size / nrow;
+    out->Reshape(Shape{nrow, ncol});
   }
-  Exp(in, out);
-  out->Reshape(Shape{nrow, ncol});
   Tensor sum(Shape{nrow}, in.device(), in.data_type());
   SumColumns(*out, &sum);
   DivColumn(sum, out);
@@ -594,6 +592,19 @@ void AddRow(const float alpha, const float beta, const Tensor &v,
Tensor *M) {
     Mult(alpha, one, vmat, beta, M);
   }
 }
+void ComputeCrossEntropy(const Tensor& t, Tensor* p) {
+  CHECK_LE(p->nDim(), 2u);
+  CHECK_LE(t.nDim(), 2u);  // TODO(wangwei) consider multi-labels.
+  size_t batchsize = 1;
+  if (p->nDim() == 2u) batchsize = p->shape(0);
+  size_t dim = p->Size() / batchsize;
+  TYPE_LANG_SWITCH(p->data_type(), DType, p->device()->lang(), Lang, {
+    p->device()->Exec([batchsize, dim, t, p](Context *ctx) {
+      ComputeCrossEntropy<DType, Lang>(batchsize, dim, p->blob(), t.blob(),
+                                       p->blob(), ctx);
+    }, {p->blob(), t.blob()}, {p->blob()});
+  });
+}
 
 template <typename SType> Tensor Div(const SType alpha, const Tensor &in) {
   Tensor out(in.shape(), in.device(), in.data_type());
@@ -665,7 +676,20 @@ void MultRow(const Tensor &v, Tensor *M) {
         {M->blob(), v.blob()}, {M->blob()});
   });
 }
-
+void SoftmaxCrossEntropyBwd(const Tensor &t, Tensor *p) {
+  CHECK_LE(p->nDim(), 2u);
+  CHECK_LE(t.nDim(), 2u);  // TODO(wangwei) consider multi-labels.
+  size_t batchsize = 1;
+  if (p->nDim() == 2u)
+    batchsize = p->shape(0);
+  size_t dim = p->Size() / batchsize;
+  TYPE_LANG_SWITCH(p->data_type(), DType, p->device()->lang(), Lang, {
+    p->device()->Exec([batchsize, dim, t, p](Context *ctx) {
+      SoftmaxCrossEntropyBwd<DType, Lang>(batchsize, dim, p->blob(), t.blob(),
+                                          p->blob(), ctx);
+    }, {p->blob(), t.blob()}, {p->blob()});
+  });
+}
 void SubColumn(const Tensor &v, Tensor *M) { AddColumn(-1, 1, v, M); }
 
 void SubRow(const Tensor &v, Tensor *M) { AddRow(-1, 1, v, M); }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h
index ff865e0..bcf4908 100644
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@ -110,12 +110,6 @@ void Sigmoid(int count, const Blob *input, Blob *ret, Context *ctx) {
   LOG(FATAL) << "Not Implemented";
 }
 
-/// Do softmax for each row invidually
-template <typename DType, typename Lang>
-void Softmax(int nrow, int ncol, const Blob *input, Blob *ret, Context *ctx) {
-  LOG(FATAL) << "Not Implemented";
-}
-
 // TODO(wangwei) unify SumRow and SumCol.
 /// Sum the rows of the input matrix into a vector
 template <typename DType, typename Lang>
@@ -312,11 +306,14 @@ void Gaussian(int count, float mean, float std, Blob *ret, Context *ctx)
{
 
 // ========follow the consistency guide of math API
 
+/// Divide alpha by each element of 'in'.
+// following the consistency guide.
 template <typename DType, typename Lang>
-void Set(const size_t num, const DType x, Blob *out, Context *ctx) {
+void ComputeCrossEntropy(const size_t batchsize, const size_t dim,
+                         const Blob *p, const Blob *t, Blob *loss,
+                         Context *ctx) {
   LOG(FATAL) << "Not Implemented";
 }
-/// Divide alpha by each element of 'in'.
 template <typename DType, typename Lang>
 void Div(const size_t num, const DType alpha, const Blob *in, Blob *out,
          Context *ctx) {
@@ -364,6 +361,17 @@ void GE(const size_t num, const Blob *in, const DType x, Blob *out,
         Context *ctx) {
   LOG(FATAL) << "Not Implemented";
 }
+template <typename DType, typename Lang>
+void Set(const size_t num, const DType x, Blob *out, Context *ctx) {
+  LOG(FATAL) << "Not Implemented";
+}
+
+template <typename DType, typename Lang>
+void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim,
+                            const Blob *p, const Blob *t, Blob *grad,
+                            Context *ctx) {
+  LOG(FATAL) << "Not Implemented";
+}
 
 }  // namespace singa
 #endif  // SINGA_CORE_MATH_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h
index 693f09c..907c656 100644
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@ -17,7 +17,9 @@
  */
 #ifndef SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_
 #define SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_
+
 #include "./tensor_math.h"
+#include <cfloat>
 #include "singa/core/common.h"
 #include <math.h>
 
@@ -210,6 +212,22 @@ void Gaussian<float, lang::Cpp>(int count, float mean, float std,
Blob *ret,
 
 // follow the consistency guide of math API
 template <>
+void ComputeCrossEntropy<float, lang::Cpp>(const size_t batchsize,
+                                           const size_t dim, const Blob *p,
+                                           const Blob *t, Blob *loss,
+                                           Context *ctx) {
+  const float *pPtr = static_cast<const float *>(p->data());
+  const float *tPtr = static_cast<const float *>(t->data());
+  float *lossPtr = static_cast<float *>(loss->mutable_data());
+  for (size_t i = 0; i < batchsize; i++) {
+    int truth_idx = static_cast<int>(tPtr[i]);
+    CHECK_GE(truth_idx, 0);
+    float prob_of_truth = pPtr[i * dim + truth_idx];
+    lossPtr[i] = -std::log(std::max(prob_of_truth, FLT_MIN));
+  }
+}
+
+template <>
 void Div<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in,
                            Blob *out, Context *ctx) {
   float *outPtr = static_cast<float *>(out->mutable_data());
@@ -249,13 +267,6 @@ void DGMM<float, lang::Cpp>(const bool side_right, const size_t
nrow,
     }
   }
 }
-
-template <>
-void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out,
-                           Context *ctx) {
-  float *outPtr = static_cast<float *>(out->mutable_data());
-  for (size_t i = 0; i < num; i++) outPtr[i] = x;
-}
 template <>
 void LE<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
                           Blob *out, Context *ctx) {
@@ -312,9 +323,32 @@ void GEMM<float, lang::Cpp>(const bool transA, const bool transB,
   cblas_sgemm(CblasRowMajor, transa, transb, nrowA, ncolB, ncolA, alpha, APtr,
               lda, BPtr, ldb, beta, CPtr, ldc);
 }
-
 #endif  // USE_CBLAS
 
+template <>
+void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out,
+                           Context *ctx) {
+  float *outPtr = static_cast<float *>(out->mutable_data());
+  for (size_t i = 0; i < num; i++) outPtr[i] = x;
+}
+template <>
+void SoftmaxCrossEntropyBwd<float, lang::Cpp>(const size_t batchsize,
+                                              const size_t dim, const Blob *p,
+                                              const Blob *t,
+                                              Blob *grad, Context *ctx) {
+  CHECK_EQ(p, grad) << "Use the same pointer to optimize performance";
+  // const float* pPtr = static_cast<const float*>(p->data());
+  const float *tPtr = static_cast<const float *>(t->data());
+  float *gradPtr = static_cast<float *>(grad->mutable_data());
+
+  for (size_t i = 0; i < batchsize; i++) {
+    int truth_idx = static_cast<int>(tPtr[i]);
+    CHECK_GE(truth_idx, 0);
+    gradPtr[i * dim + truth_idx] -= 1.0;
+  }
+}
+
+
 }  // namespace singa
 
 #endif  // SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h
index 4a2ba66..c69620c 100644
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@ -75,6 +75,17 @@ void Sum<float, lang::Cuda>(int count, const Blob *input, float *ret,
 
 // follow the consistency guide of math API
 template <>
+void ComputeCrossEntropy<float, lang::Cuda>(const size_t batchsize,
+                                            const size_t dim, const Blob *p,
+                                            const Blob *t, Blob *loss,
+                                            Context *ctx) {
+  const float *pPtr = static_cast<const float *>(p->data());
+  const int *tPtr = static_cast<const int *>(t->data());
+  float *lossPtr = static_cast<float *>(loss->mutable_data());
+  cuda::ComputeCrossEntropy(batchsize, dim, pPtr, tPtr, lossPtr, ctx->stream);
+}
+
+template <>
 void Div<float, lang::Cuda>(const size_t num, const float alpha, const Blob *in,
                             Blob *out, Context *ctx) {
   float *outPtr = static_cast<float *>(out->mutable_data());
@@ -82,19 +93,13 @@ void Div<float, lang::Cuda>(const size_t num, const float alpha,
const Blob *in,
   cuda::Div(num, alpha, inPtr, outPtr, ctx->stream);
 }
 
-template <>
-void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out,
-                            Context *ctx) {
-  float *outPtr = static_cast<float *>(out->mutable_data());
-  cuda::Set(num, x, outPtr, ctx->stream);
-}
 // NOTE: cublas uses column major order.
 // http://peterwittek.com/cublas-matrix-c-style.html
 template <>
 void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow,
                              const size_t ncol, const Blob *M, const Blob *v,
                              Blob *out, Context *ctx) {
-  auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
+  auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
   const float *MPtr = static_cast<const float *>(M->data());
   const float *vPtr = static_cast<const float *>(v->data());
   float *outPtr = static_cast<float *>(out->mutable_data());
@@ -121,7 +126,7 @@ void GEMM<float, lang::Cuda>(const bool transA, const bool transB,
   const float *APtr = static_cast<const float *>(A->data());
   const float *BPtr = static_cast<const float *>(B->data());
   float *CPtr = static_cast<float *>(C->mutable_data());
-  auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
+  auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
   CUBLAS_CHECK(cublasSgemm(handle, transb, transa, ncolB, nrowA, ncolA, &alpha,
                            BPtr, ldb, APtr, lda, &beta, CPtr, ldc));
 }
@@ -155,9 +160,25 @@ void LT<float, lang::Cuda>(const size_t num, const Blob* in, const
float x,
   cuda::LT(num, inPtr, x, outPtr, ctx->stream);
 }
 
+template<>
+void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out,
+                            Context *ctx) {
+  float *outPtr = static_cast<float *>(out->mutable_data());
+  cuda::Set(num, x, outPtr, ctx->stream);
+}
 
-
-
+template <>
+void SoftmaxCrossEntropyBwd<float, lang::Cuda>(const size_t batchsize,
+                                               const size_t dim, const Blob *p,
+                                               const Blob *t, Blob *grad,
+                                               Context *ctx) {
+  CHECK_EQ(p, grad) << "Use the same pointer to optimize performance";
+  const float *pPtr = static_cast<const float *>(p->data());
+  const int *tPtr = static_cast<const int *>(t->data());
+  float *gradPtr = static_cast<float *>(grad->mutable_data());
+  cuda::SoftmaxCrossEntropyBwd(batchsize, dim, pPtr, tPtr, gradPtr,
+                               ctx->stream);
+}
 
 }  // namespace singa
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/layer/softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/softmax.cc b/src/model/layer/softmax.cc
index 813ebf0..8af1d76 100644
--- a/src/model/layer/softmax.cc
+++ b/src/model/layer/softmax.cc
@@ -26,10 +26,11 @@ void Softmax::Setup(const LayerConf& conf) {
 
 const Tensor Softmax::Forward(int flag, const Tensor& input) {
   if (input.nDim() == 1) {
-    Tensor tmp = Reshape(input, Shape{1, input.Size()});
-    buf_.push(SoftMax(tmp, 0));
+    buf_.push(SoftMax(input));
   } else {
-    buf_.push(SoftMax(input, axis_));
+    size_t nrow = Product(input.shape(), 0, axis_);
+    const Tensor& tmp = Reshape(input, Shape{nrow, input.Size() / nrow});
+    buf_.push(SoftMax(tmp));
   }
   return buf_.top();
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/cross_entropy.h
----------------------------------------------------------------------
diff --git a/src/model/loss/cross_entropy.h b/src/model/loss/cross_entropy.h
deleted file mode 100644
index 815b795..0000000
--- a/src/model/loss/cross_entropy.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SRC_MODEL_LOSS_CROSS_ENTROPY_H_
-#define SRC_MODEL_LOSS_CROSS_ENTROPY_H_
-#include <stack>
-#include "singa/model/loss.h"
-
-namespace singa {
-
-/// Cross entropy is for cross entropy loss.
-class CrossEntropy : public Loss<Tensor> {
- public:
-  /// Compute the loss values for each sample/instance given the prediction
-  /// and the target, which is sum {-log(prob_of_truth)}
-  /// Users can call Average(const Tensor&) to get the average
-  /// loss value over all samples in the batch.
-  Tensor Forward(const Tensor& prediction, const Tensor& target) override;
-
-  /// Compute the gradients of the loss values w.r.t. the prediction,
-  /// which is: if the entry x corresponds to ground truth,
-  /// then softmax(x) - 1; else, softmax(x)
-  Tensor Backward() override;
-
- private:
-  // to buffer intermediate data, i.e., softmax(prediction), target
-  std::stack<Tensor> buf_;
-};
-
-Tensor CrossEntropy::Forward(const Tensor& prediction, const Tensor& target) {
-  CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
-                      << " The calling pattern is [Forward|Evaluate] Backward";
-
-  size_t batchsize = 1;
-  if (prediction.nDim() > 1) batchsize = prediction.shape().at(0);
-  size_t dim = prediction.Size() / batchsize;
-  // a temporal Softmax layer for forward computation
-//  LayerConf conf; // TODO(kaiping): this is currently commented
-//  Softmax softmax_tmp;
-//  softmax_tmp.Setup(conf);
-//  Tensor softmax = softmax_tmp.Forward(0, prediction);
-
-  Tensor softmax(Shape{batchsize, dim});  // TODO(kaiping): Delete
-//  softmax.SetValue<float>(0.5f); // TODO(kaiping): Delete
-
-  softmax.Reshape(Shape{batchsize, dim});
-  // buffer intermediate data
-  buf_.push(softmax);
-  buf_.push(target);
-
-  // Compute loss for each sample
-  Tensor loss(Shape{batchsize, 1});
-  float * pre_ptr = reinterpret_cast<float*>(softmax.blob()->mutable_data());
-  float * truth_ptr = reinterpret_cast<float*>(target.blob()->mutable_data());
-  float * loss_ptr = reinterpret_cast<float*>(loss.blob()->mutable_data());
-  for (size_t i = 0; i < batchsize; i++) {
-    int ilabel = static_cast<int>(truth_ptr[i]);
-    CHECK_GE(ilabel, 0);
-    float prob_of_truth = pre_ptr[ilabel];
-    loss_ptr[i] = -log(prob_of_truth);
-    pre_ptr += dim;  // change to the next sample
-  }
-  return loss;
-}
-
-Tensor CrossEntropy::Backward() {
-  const Tensor& target = buf_.top();
-  buf_.pop();
-  Tensor softmax = buf_.top();
-  buf_.pop();
-
-  size_t batchsize = 1;
-  if (softmax.nDim() > 1)
-    batchsize = softmax.shape().at(0);
-  size_t dim = softmax.Size() / batchsize;
-  float * truth_ptr = reinterpret_cast<float*>(target.blob()->mutable_data());
-  float * pre_ptr = reinterpret_cast<float*>(softmax.blob()->mutable_data());
-  for (size_t i = 0; i < batchsize; i++) {
-    int ilabel = static_cast<int>(truth_ptr[i]);
-    // CHECK_GE(ilabel, 0);
-    pre_ptr[ilabel] -= 1.0;
-    pre_ptr += dim;  // change to the next sample
-  }
-  return softmax;
-}
-}  // namespace singa
-
-#endif  // SRC_MODEL_LOSS_CROSS_ENTROPY_H_
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/mse.cc
----------------------------------------------------------------------
diff --git a/src/model/loss/mse.cc b/src/model/loss/mse.cc
new file mode 100644
index 0000000..a4bbb72
--- /dev/null
+++ b/src/model/loss/mse.cc
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "singa/model/loss.h"
+
+namespace singa {
+
+Tensor MSE::Forward(const Tensor& prediction, const Tensor& target) {
+  CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
+                      << " The calling pattern is [Forward|Evaluate] Backward";
+  Tensor t = prediction - target;
+  size_t batchsize = 1;
+  if (t.nDim() > 1) batchsize = t.shape().at(0);
+  size_t dim = t.Size() / batchsize;
+  t.Reshape(Shape{batchsize, dim});
+  buf_.push(t);
+  // TODO(wangwei) use CastType for operator/
+  return Sum(Square(t), 1) * 0.5f;
+}
+
+Tensor MSE::Backward() {
+  Tensor ret = buf_.top();
+  buf_.pop();
+  return ret * (1.0f / ret.shape().at(0));
+}
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/mse.h
----------------------------------------------------------------------
diff --git a/src/model/loss/mse.h b/src/model/loss/mse.h
deleted file mode 100644
index 1a022f9..0000000
--- a/src/model/loss/mse.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SINGA_MODEL_LOSS_MSE_H_
-#define SINGA_MODEL_LOSS_MSE_H_
-#include <stack>
-#include "singa/model/loss.h"
-
-namespace singa {
-
-/// MSE is for mean squared error or squared euclidean distance.
-class MSE : public Loss<Tensor> {
- public:
-  /// Compute the loss values for each sample/instance given the prediction
-  /// and the target, which is 0.5/||prediction-target||^2
-  /// Users can call Average(const Tensor&) to get the average
-  /// loss value over all samples in the batch.
-  Tensor Forward(const Tensor& prediction, const Tensor& target) override;
-
-  /// Compute the gradients of the loss values w.r.t. the prediction,
-  /// which is (prediction-target)/batchsize
-  Tensor Backward() override;
-
- private:
-  // to buffer intermediate data, i.e., prediction-target
-  std::stack<Tensor> buf_;
-};
-
-Tensor MSE::Forward(const Tensor& prediction, const Tensor& target) {
-  CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
-                      << " The calling pattern is [Forward|Evaluate] Backward";
-  Tensor t = prediction - target;
-  size_t batchsize = 1;
-  if (t.nDim() > 1) batchsize = t.shape().at(0);
-  size_t dim = t.Size() / batchsize;
-  t.Reshape(Shape{batchsize, dim});
-  buf_.push(t);
-  // TODO(wangwei) use CastType for operator/
-  return Sum(Square(t), 1) * 0.5f;
-}
-
-Tensor MSE::Backward() {
-  Tensor ret = buf_.top();
-  buf_.pop();
-  return ret * (1.0f / ret.shape().at(0));
-}
-}  // namespace singa
-
-#endif  // SINGA_MODEL_LOSS_H_
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/softmax_cross_entropy.cc
----------------------------------------------------------------------
diff --git a/src/model/loss/softmax_cross_entropy.cc b/src/model/loss/softmax_cross_entropy.cc
new file mode 100644
index 0000000..4ca323a
--- /dev/null
+++ b/src/model/loss/softmax_cross_entropy.cc
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stack>
+#include "singa/model/loss.h"
+
+namespace singa {
+
+
+Tensor SoftmaxCrossEntropy::Forward(const Tensor& prediction, const Tensor& target)
{
+  CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
+                      << " The calling pattern is [Forward|Evaluate] Backward";
+  size_t batchsize = 1;
+  if (prediction.nDim() > 1) batchsize = prediction.shape().at(0);
+  size_t dim = prediction.Size() / batchsize;
+  const Tensor& input = Reshape(prediction, Shape{batchsize, dim});
+  Tensor prob = SoftMax(input);
+
+  // buffer intermediate data
+  buf_.push(prob);
+  buf_.push(target);
+  Tensor loss = prob.Clone();
+
+  ComputeCrossEntropy(target, &loss);
+  return loss;
+}
+
+Tensor SoftmaxCrossEntropy::Backward() {
+  const Tensor target = buf_.top();
+  buf_.pop();
+  Tensor prob = buf_.top();
+  buf_.pop();
+  SoftmaxCrossEntropyBwd(target, &prob);
+  return prob;
+}
+}  // namespace singa
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/test/singa/test_cross_entropy.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cross_entropy.cc b/test/singa/test_cross_entropy.cc
index 9bb2321..6b8cb69 100644
--- a/test/singa/test_cross_entropy.cc
+++ b/test/singa/test_cross_entropy.cc
@@ -22,16 +22,15 @@
 #include "gtest/gtest.h"
 #include "singa/core/tensor.h"
 #include "singa/core/device.h"
-#include "../src/model/loss/cross_entropy.h"
+#include "singa/model/loss.h"
+#include "singa_config.h"
 
 using singa::Tensor;
-class TestCrossEntropy : public ::testing::Test {
+class TestSoftmaxCrossEntropy : public ::testing::Test {
  protected:
   virtual void SetUp() {
     p.Reshape(singa::Shape{2, 4});
     t.Reshape(singa::Shape{2, 1});
-    p.CopyDataFromHostPtr(pdat, sizeof(pdat) / sizeof(float));
-    t.CopyDataFromHostPtr(tdat, sizeof(pdat) / sizeof(float));
   }
   const float pdat[8] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
   const float tdat[2] = {0.0, 2.0};
@@ -39,8 +38,11 @@ class TestCrossEntropy : public ::testing::Test {
   singa::Tensor p, t;
 };
 
-TEST_F(TestCrossEntropy, CppForward) {
-  singa::CrossEntropy cross_entropy;
+TEST_F(TestSoftmaxCrossEntropy, CppForward) {
+  p.CopyDataFromHostPtr(pdat, 8);
+  t.CopyDataFromHostPtr(tdat, 2);
+
+  singa::SoftmaxCrossEntropy cross_entropy;
   const Tensor& loss = cross_entropy.Forward(p, t);
   auto ldat = loss.data<const float*>();
 
@@ -49,8 +51,11 @@ TEST_F(TestCrossEntropy, CppForward) {
   EXPECT_FLOAT_EQ(ldat[1], result_test);
 }
 
-TEST_F(TestCrossEntropy, CppBackward) {
-  singa::CrossEntropy cross_entropy;
+TEST_F(TestSoftmaxCrossEntropy, CppBackward) {
+  p.CopyDataFromHostPtr(pdat, 8);
+  t.CopyDataFromHostPtr(tdat, 2);
+
+  singa::SoftmaxCrossEntropy cross_entropy;
   cross_entropy.Forward(p, t);
   const Tensor& grad = cross_entropy.Backward();
 
@@ -64,3 +69,46 @@ TEST_F(TestCrossEntropy, CppBackward) {
   EXPECT_FLOAT_EQ(gdat[6], -0.75);
   EXPECT_FLOAT_EQ(gdat[7], 0.25);
 }
+
+#ifdef USE_CUDA
+
+TEST_F(TestSoftmaxCrossEntropy, CudaForward) {
+  singa::SoftmaxCrossEntropy cross_entropy;
+  singa::CudaGPU dev;
+  p.ToDevice(&dev);
+  t.ToDevice(&dev);
+  p.CopyDataFromHostPtr(pdat, 8);
+  t.CopyDataFromHostPtr(tdat, 2);
+
+  Tensor loss = cross_entropy.Forward(p, t);
+  loss.ToHost();
+  auto ldat = loss.data<const float*>();
+
+  const float result_test = -log(0.25);
+  EXPECT_FLOAT_EQ(ldat[0], result_test);
+  EXPECT_FLOAT_EQ(ldat[1], result_test);
+}
+
+TEST_F(TestSoftmaxCrossEntropy, CudaBackward) {
+  singa::SoftmaxCrossEntropy cross_entropy;
+  singa::CudaGPU dev;
+  p.ToDevice(&dev);
+  t.ToDevice(&dev);
+  p.CopyDataFromHostPtr(pdat, 8);
+  t.CopyDataFromHostPtr(tdat, 2);
+
+  cross_entropy.Forward(p, t);
+  Tensor grad = cross_entropy.Backward();
+
+  grad.ToHost();
+  auto gdat = grad.data<const float*>();
+  EXPECT_FLOAT_EQ(gdat[0], -0.75);
+  EXPECT_FLOAT_EQ(gdat[1], 0.25);
+  EXPECT_FLOAT_EQ(gdat[2], 0.25);
+  EXPECT_FLOAT_EQ(gdat[3], 0.25);
+  EXPECT_FLOAT_EQ(gdat[4], 0.25);
+  EXPECT_FLOAT_EQ(gdat[5], 0.25);
+  EXPECT_FLOAT_EQ(gdat[6], -0.75);
+  EXPECT_FLOAT_EQ(gdat[7], 0.25);
+}
+#endif  // USE_CUDA

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/test/singa/test_mse.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_mse.cc b/test/singa/test_mse.cc
index 67f583c..a6bd1c3 100644
--- a/test/singa/test_mse.cc
+++ b/test/singa/test_mse.cc
@@ -22,8 +22,9 @@
 #include "gtest/gtest.h"
 #include "singa/core/tensor.h"
 #include "singa/core/device.h"
-#include "../src/model/loss/mse.h"
+#include "singa/model/loss.h"
 #include "singa_config.h"
+
 using singa::Tensor;
 class TestMSE : public ::testing::Test {
  protected:
@@ -66,6 +67,8 @@ TEST_F(TestMSE, CppBackward) {
     EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i]));
 }
 #endif
+
+#ifdef USE_CUDA
 TEST_F(TestMSE, CudaForward) {
   singa::MSE mse;
   singa::CudaGPU dev;
@@ -98,3 +101,4 @@ TEST_F(TestMSE, CudaBackward) {
   for (size_t i = 0; i < grad.Size(); i++)
     EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i]));
 }
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/test/singa/test_softmax.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_softmax.cc b/test/singa/test_softmax.cc
index da2a6ef..09dfcd9 100644
--- a/test/singa/test_softmax.cc
+++ b/test/singa/test_softmax.cc
@@ -55,7 +55,6 @@ TEST(Softmax, Forward) {
   const float* yptr = out.data<const float*>();
   EXPECT_EQ(n, out.Size());
 
-  float* y = new float[n];
   float* sigma = new float[row];
   for (size_t i = 0; i < row; i++)
     sigma[i] = 0.f;
@@ -63,11 +62,9 @@ TEST(Softmax, Forward) {
     sigma[i / col] += exp(x[i]);
   //EXPECT_EQ(0, sigma[1]);
   for (size_t i = 0; i < row; i++)
-    for (size_t j = 0; j < col; j++)
-      y[i * col + j] = exp(x[i * col + j]) / sigma[i];
-  EXPECT_FLOAT_EQ(y[0], yptr[0]);
-  EXPECT_FLOAT_EQ(y[4], yptr[4]);
-  EXPECT_FLOAT_EQ(y[5], yptr[5]);
+    for (size_t j = 0; j < col; j++) {
+      EXPECT_FLOAT_EQ(yptr[i * col + j], exp(x[i * col + j]) / sigma[i]);
+    }
 }
 
 TEST(Softmax, Backward) {



Mime
View raw message