singa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wang...@apache.org
Subject [04/51] [abbrv] incubator-singa git commit: SINGA-174 Add Batch Normalization layer and Local Response Nomalization layer.
Date Wed, 17 Aug 2016 18:02:25 GMT
SINGA-174 Add Batch Normalization layer and Local Response Nomalization
    layer.

Revise cifar10 example to support batchnormalized vgg model trainning
on CPU.
Now parameters of Batch Normalization layer are 1D tensor
both in GPU and CPU version.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/055ff17b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/055ff17b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/055ff17b

Branch: refs/heads/master
Commit: 055ff17b2a2507ffd411eaf6f281d476152a87dc
Parents: 05720c2
Author: Wang Ji <ijingobravo@gmail.com>
Authored: Thu Aug 11 15:28:38 2016 +0800
Committer: Wang Ji <ijingobravo@gmail.com>
Committed: Thu Aug 11 15:28:38 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/train.py          |  2 +-
 examples/cifar10/vgg.py            | 16 ++++++++--------
 src/model/layer/cudnn_batchnorm.cc | 16 ++++++++--------
 test/singa/test_cudnn_batchnorm.cc | 22 ++++++++--------------
 4 files changed, 25 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/055ff17b/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index 3285651..9d363cf 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -107,7 +107,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
         dev = device.create_cuda_gpu()
 
     net.to_device(dev)
-    opt = optimizer.SGD(momentum=0.9, weight_decay=0.004)
+    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
     for (p, specs) in zip(net.param_values(), net.param_specs()):
         opt.register(p, specs)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/055ff17b/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 97e690c..cd0f613 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -80,14 +80,14 @@ def create_net(use_cpu=False):
     print 'Start intialization............'
     for (p, name) in zip(net.param_values(), net.param_names()):
         print name, p.shape
-        if len(p.shape) > 1:
-            if 'mean' in name or 'beta' in name:
-                p.set_value(0.0)
-            elif 'var' in name:
-                p.set_value(1.0)
-            elif 'gamma' in name:
-                initializer.uniform(p, 0, 1)
-            elif 'conv' in name:
+        if 'mean' in name or 'beta' in name:
+            p.set_value(0.0)
+        elif 'var' in name:
+            p.set_value(1.0)
+        elif 'gamma' in name:
+            initializer.uniform(p, 0, 1)
+        elif len(p.shape) > 1:
+            if 'conv' in name:
                 initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
             else:
                 initializer.gaussian(p, 0, 0.02)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/055ff17b/src/model/layer/cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_batchnorm.cc b/src/model/layer/cudnn_batchnorm.cc
index 01682b7..f29679c 100644
--- a/src/model/layer/cudnn_batchnorm.cc
+++ b/src/model/layer/cudnn_batchnorm.cc
@@ -39,14 +39,14 @@ void CudnnBatchNorm::ToDevice(std::shared_ptr<Device> device) {
 
 void CudnnBatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) {
   BatchNorm::Setup(in_sample, conf);
-  bnScale_.Reshape(Shape{1,channels_,1,1});
-  bnBias_.ResetLike(bnScale_);
-  dbnScale_.ResetLike(bnScale_);
-  dbnBias_.ResetLike(bnScale_);
-  runningMean_.ResetLike(bnScale_);
-  runningVariance_.ResetLike(bnScale_);
-  resultSaveMean_.ResetLike(bnScale_);
-  resultSaveVariance_.ResetLike(bnScale_);
+  bnScale_.Reshape(Shape{channels_});
+  bnBias_.Reshape(Shape{channels_});
+  dbnScale_.Reshape(Shape{channels_});
+  dbnBias_.Reshape(Shape{channels_});
+  runningMean_.Reshape(Shape{channels_});
+  runningVariance_.Reshape(Shape{channels_});
+  resultSaveMean_.Reshape(Shape{channels_});
+  resultSaveVariance_.Reshape(Shape{channels_});
 }
 
 void CudnnBatchNorm::InitCudnn(const Shape& shape, DataType dtype) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/055ff17b/test/singa/test_cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_batchnorm.cc b/test/singa/test_cudnn_batchnorm.cc
index b2746dc..b024c19 100644
--- a/test/singa/test_cudnn_batchnorm.cc
+++ b/test/singa/test_cudnn_batchnorm.cc
@@ -152,19 +152,19 @@ TEST(CudnnBatchNorm, Backward) {
   singa::Tensor dy_tensor(singa::Shape{1,2,4,4}, cuda);
   dy_tensor.CopyDataFromHostPtr(dy, 1*2*4*4);
   const float alpha_[] = {1, 1};
-  singa::Tensor alpha(singa::Shape{1,2,1,1}, cuda);
+  singa::Tensor alpha(singa::Shape{2}, cuda);
   alpha.CopyDataFromHostPtr(alpha_, 1*2*1*1);
 
   const float beta_[] = {0, 0};
-  singa::Tensor beta(singa::Shape{1,2,1,1}, cuda);
+  singa::Tensor beta(singa::Shape{2}, cuda);
   beta.CopyDataFromHostPtr(beta_, 1*2*1*1);
 
   const float mean_[] = {0.0123405, -0.0622333};
-  singa::Tensor mean(singa::Shape{1,2,1,1}, cuda);
+  singa::Tensor mean(singa::Shape{2}, cuda);
   mean.CopyDataFromHostPtr(mean_, 1*2*1*1);
 
   const float var_[] = {15.9948, 8.68198};
-  singa::Tensor var(singa::Shape{1,2,1,1}, cuda);
+  singa::Tensor var(singa::Shape{2}, cuda);
   var.CopyDataFromHostPtr(var_, 1*2*1*1);
 
   batchnorm.ToDevice(cuda);
@@ -220,11 +220,8 @@ TEST(CudnnBatchNorm, Backward) {
   dbnScale.ToHost();
   const float *dbnScaleptr = dbnScale.data<float>();
   const auto & dbnScaleShape = dbnScale.shape();
-  EXPECT_EQ(4u, dbnScaleShape.size());
-  EXPECT_EQ(1u, dbnScaleShape[0]);
-  EXPECT_EQ(2u, dbnScaleShape[1]);
-  EXPECT_EQ(1u, dbnScaleShape[2]);
-  EXPECT_EQ(1u, dbnScaleShape[3]);
+  EXPECT_EQ(1u, dbnScaleShape.size());
+  EXPECT_EQ(2u, dbnScaleShape[0]);
 
   EXPECT_NEAR(-0.013569f, dbnScaleptr[0], 1e-4f);
   EXPECT_NEAR(-0.00219431f, dbnScaleptr[1], 1e-4f);
@@ -233,11 +230,8 @@ TEST(CudnnBatchNorm, Backward) {
   dbnBias.ToHost();
   const float *dbnBiasptr = dbnBias.data<float>();
   const auto & dbnBiasShape = dbnBias.shape();
-  EXPECT_EQ(4u, dbnBiasShape.size());
-  EXPECT_EQ(1u, dbnBiasShape[0]);
-  EXPECT_EQ(2u, dbnBiasShape[1]);
-  EXPECT_EQ(1u, dbnBiasShape[2]);
-  EXPECT_EQ(1u, dbnBiasShape[3]);
+  EXPECT_EQ(1u, dbnBiasShape.size());
+  EXPECT_EQ(2u, dbnBiasShape[0]);
 
   EXPECT_NEAR(-0.0322803f, dbnBiasptr[0], 1e-4f);
   EXPECT_NEAR(0.0161278f, dbnBiasptr[1], 1e-4f);


Mime
View raw message