singa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wang...@apache.org
Subject [2/3] incubator-singa git commit: SINGA-98 Add Support for AlexNet ImageNet Classification Model
Date Tue, 05 Jan 2016 16:59:15 GMT
SINGA-98 Add Support for AlexNet ImageNet Classification Model

Update job.conf for alexnet: learning rate, layer order, lr_scale/wd_scale; add cudnn.conf.

Fix a bug in image_preprocess.cc which sets the dst pointer incorrectly.
It leads to the observation that the loss and accuracy does not improve after a few iterations;
(the loss is about 6.90x, tested for about 10k iterations);
Cafffe's performance starts improving after 3000 iterations (is around 6.90x during 200-3500
iterations).
After fixing the bug, training using mini-batch 128 works, but the loss starts reducing after
around 10k steps.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6e815db3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6e815db3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6e815db3

Branch: refs/heads/master
Commit: 6e815db34b2ca8808ef020071d689043c1e7c469
Parents: fe86b02
Author: Wei Wang <wangwei@comp.nus.edu.sg>
Authored: Sat Dec 26 15:52:13 2015 +0800
Committer: Wei Wang <wangwei@comp.nus.edu.sg>
Committed: Wed Jan 6 00:49:19 2016 +0800

----------------------------------------------------------------------
 examples/alexnet/cudnn.conf                    | 434 ++++++++++++++++++++
 examples/alexnet/job.conf                      | 103 +++--
 examples/cifar10/cudnn.conf                    |   5 +-
 include/singa/utils/math_blob.h                |   6 +
 include/singa/utils/math_kernel.h              |   3 +-
 src/neuralnet/connection_layer/slice.cc        |   4 +-
 src/neuralnet/input_layer/image_preprocess.cc  |  26 +-
 src/neuralnet/input_layer/record.cc            |   2 +-
 src/neuralnet/layer.cc                         |   6 +-
 src/neuralnet/neuron_layer/activation.cc       |  12 +-
 src/neuralnet/neuron_layer/cudnn_activation.cc |  19 +-
 src/neuralnet/neuron_layer/cudnn_lrn.cc        |   2 -
 src/neuralnet/neuron_layer/dropout.cc          |   2 +
 src/neuralnet/neuron_layer/lrn.cc              |  10 +-
 src/proto/job.proto                            |   1 -
 src/utils/image_transform.cc                   |   4 +-
 src/utils/updater.cc                           |   9 +-
 src/worker.cc                                  |   2 +-
 18 files changed, 566 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/examples/alexnet/cudnn.conf
----------------------------------------------------------------------
diff --git a/examples/alexnet/cudnn.conf b/examples/alexnet/cudnn.conf
new file mode 100644
index 0000000..eef20f9
--- /dev/null
+++ b/examples/alexnet/cudnn.conf
@@ -0,0 +1,434 @@
+name: "alexnet"
+train_steps: 450000
+#test_steps: 500
+#test_freq: 1000
+disp_freq: 20
+checkpoint_freq: 100000
+checkpoint_after: 100000
+gpu: 2
+#debug: true
+#checkpoint_path: "examples/alexnet/checkpoint/step10000-worker0"
+train_one_batch {
+  alg: kBP
+}
+updater{
+  type: kSGD
+  weight_decay: 0.0005
+  momentum: 0.9
+  learning_rate {
+    type: kStep
+    base_lr: 0.01
+    step_conf {
+      gamma: 0.1
+      change_freq: 100000
+    }
+  }
+}
+neuralnet {
+  layer{
+    name: "data"
+    type: kRecordInput
+    store_conf {
+      backend: "kvfile"
+      path :"/data/dataset/imagenet/train_record.bin"
+      mean_file: "/data/dataset/imagenet/image_mean.bin"
+      batchsize: 256
+      #random_skip: 1000
+      shape: 3
+      shape: 256
+      shape: 256
+    }
+    include: kTrain
+  }
+  layer{
+    name: "data"
+    type: kRecordInput
+    store_conf {
+      backend: "kvfile"
+      path :"/data/dataset/imagenet/val_record.bin"
+      mean_file: "/data/dataset/imagenet/image_mean.bin"
+      batchsize: 100
+      shape: 3
+      shape: 256
+      shape: 256
+    }
+    include: kTest
+  }
+  layer{
+    name: "image"
+    type: kImagePreprocess
+    rgbimage_conf {
+      cropsize: 227
+      mirror: true
+    }
+#    partition_dim: 0
+    srclayers: "data"
+  }
+  layer{
+    name: "conv1"
+    type: kCudnnConv
+    srclayers: "image"
+    convolution_conf {
+      num_filters: 96
+      kernel: 11
+      stride: 4
+    }
+#    partition_dim: 0
+    param {
+      name: "w1"
+      init {
+        type: kGaussian
+        std: 0.01
+      }
+    }
+    param {
+      name: "b1"
+      lr_scale: 2
+      wd_scale: 0
+      init {
+        type: kConstant
+        value: 0
+      }
+    }
+  }
+  layer {
+    name: "relu1"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers: "conv1"
+#    partition_dim: 0
+  }
+  layer {
+    name: "pool1"
+    type: kCudnnPool
+    pooling_conf {
+      pool: MAX
+      kernel: 3
+      stride: 2
+    }
+    srclayers: "relu1"
+#    partition_dim: 0
+  }
+  layer {
+    name: "norm1"
+    type: kCudnnLRN
+    lrn_conf {
+      local_size: 5
+      alpha: 0.0001
+      beta: 0.75
+      knorm: 1
+    }
+    srclayers: "pool1"
+#    partition_dim: 0
+  }
+
+  layer{
+    name: "conv2"
+    type: kCudnnConv
+    srclayers: "norm1"
+    convolution_conf {
+      num_filters: 256
+      kernel: 5
+      pad: 2
+    }
+#    partition_dim: 0
+    param {
+      name: "w2"
+      init {
+        type: kGaussian
+        std: 0.01
+      }
+    }
+    param {
+      name: "b2"
+      lr_scale: 2
+      wd_scale: 0
+      init {
+        type: kConstant
+        value: 1
+      }
+    }
+  }
+  layer {
+    name: "relu2"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers: "conv2"
+#    partition_dim: 0
+  }
+  layer {
+    name: "pool2"
+    type: kCudnnPool
+    pooling_conf {
+      pool: MAX
+      kernel: 3
+      stride: 2
+    }
+    srclayers: "relu2"
+#    partition_dim: 0
+  }
+
+  layer {
+    name: "norm2"
+    type: kCudnnLRN
+    lrn_conf {
+      local_size: 5
+      alpha: 0.0001
+      beta: 0.75
+      knorm: 1
+    }
+    srclayers: "pool2"
+#    partition_dim: 0
+  }
+  layer{
+    name: "conv3"
+    type: kCudnnConv
+    srclayers: "norm2"
+    convolution_conf {
+      num_filters: 384
+      kernel: 3
+      pad: 1
+    }
+#    partition_dim: 0
+    param {
+      name: "w3"
+      init {
+        type: kGaussian
+        std: 0.01
+      }
+    }
+    param {
+      name: "b3"
+      lr_scale: 2
+      wd_scale: 0
+      init {
+        type: kConstant
+        value: 0
+      }
+    }
+  }
+  layer {
+    name: "relu3"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers: "conv3"
+#    partition_dim: 0
+  }
+  layer{
+    name: "conv4"
+    type: kCudnnConv
+    srclayers: "relu3"
+    convolution_conf {
+      num_filters: 384
+      kernel: 3
+      pad: 1
+    }
+#    partition_dim: 0
+    param {
+      name: "w4"
+      init {
+        type: kGaussian
+        std: 0.01
+      }
+    }
+    param {
+      name: "b4"
+      lr_scale:2
+      wd_scale:0
+      init {
+        type: kConstant
+        value: 1
+      }
+    }
+  }
+  layer {
+    name: "relu4"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers: "conv4"
+#    partition_dim: 0
+  }
+  layer{
+    name: "conv5"
+    type: kCudnnConv
+    srclayers: "relu4"
+    convolution_conf {
+      num_filters: 256
+      kernel: 3
+      pad: 1
+    }
+#    partition_dim: 0
+    param {
+      name: "w5"
+      init {
+        type: kGaussian
+        std: 0.01
+      }
+    }
+    param {
+      name: "b5"
+      lr_scale: 2
+      wd_scale: 0
+      init {
+        type: kConstant
+        value: 1
+      }
+    }
+  }
+  layer {
+    name: "relu5"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers: "conv5"
+#    partition_dim: 0
+  }
+  layer {
+    name: "pool5"
+    type: kCudnnPool
+    pooling_conf {
+      pool: MAX
+      kernel: 3
+      stride: 2
+    }
+    srclayers: "relu5"
+#    partition_dim: 0
+  }
+  layer {
+    name: "ip6"
+    type: kInnerProduct
+    innerproduct_conf {
+      num_output: 4096
+    }
+    param {
+      name: "w6"
+      init {
+        type: kGaussian
+        std: 0.005
+      }
+    }
+    param {
+      name: "b6"
+      lr_scale: 2
+      wd_scale: 0
+      init {
+        type: kConstant
+        value: 1
+      }
+    }
+    srclayers: "pool5"
+#    partition_dim: 1
+  }
+  layer {
+    name: "relu6"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers: "ip6"
+#    partition_dim: 1
+  }
+  layer {
+    name: "drop6"
+    type: kDropout
+    srclayers: "relu6"
+#    partition_dim: 1
+  }
+  layer {
+    name: "ip7"
+    type: kInnerProduct
+    innerproduct_conf {
+      num_output: 4096
+    }
+#    partition_dim: 1
+    param {
+      name: "w7"
+      init {
+        type: kGaussian
+        std: 0.005
+      }
+    }
+    param {
+      name: "b7"
+      lr_scale: 2
+      wd_scale: 0
+      init {
+        type: kConstant
+        value: 1
+      }
+    }
+    srclayers: "drop6"
+  }
+  layer {
+    name: "relu7"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers: "ip7"
+#    partition_dim: 1
+  }
+  layer {
+    name: "drop7"
+    type: kDropout
+    srclayers: "relu7"
+#    partition_dim: 1
+  }
+  layer {
+    name: "ip8"
+    type: kInnerProduct
+    innerproduct_conf {
+      num_output: 1000
+    }
+#    partition_dim: 1
+    param {
+      name: "w8"
+      init {
+        type: kGaussian
+        std: 0.01
+      }
+    }
+    param {
+      name: "b8"
+      lr_scale: 2
+      wd_scale: 0
+      init {
+        type: kConstant
+        value: 0
+      }
+    }
+    srclayers: "drop7"
+  }
+  layer {
+    name: "loss"
+    type: kCudnnSoftmaxLoss
+    softmaxloss_conf {
+      topk:1
+    }
+    srclayers: "ip8"
+    srclayers: "data"
+  }
+}
+cluster {
+  nworker_groups: 1
+  nserver_groups: 1
+  nworkers_per_group: 1
+  nworkers_per_procs: 1
+  workspace: "examples/alexnet"
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/examples/alexnet/job.conf
----------------------------------------------------------------------
diff --git a/examples/alexnet/job.conf b/examples/alexnet/job.conf
index b8ac6c7..1898a09 100644
--- a/examples/alexnet/job.conf
+++ b/examples/alexnet/job.conf
@@ -1,8 +1,9 @@
 name: "alexnet"
-train_steps: 100000
-test_steps: 10
+train_steps: 100
+test_steps: 0
 test_freq: 300
-disp_freq: 100
+disp_freq: 5
+#debug: true
 #checkpoint_path: "examples/alexnet/checkpoint/step10000-worker0"
 train_one_batch {
   alg: kBP
@@ -13,7 +14,7 @@ updater{
   momentum: 0.9
   learning_rate {
     type: kFixed
-    base_lr: 0.9
+    base_lr: 0.01
   }
 }
 neuralnet {
@@ -22,10 +23,10 @@ neuralnet {
     type: kRecordInput
     store_conf {
       backend: "kvfile"
-      path :"/data/dataset/train_record.bin"
-      mean_file: "/data/dataset/train_mean.bin"
-      batchsize: 256
-      random_skip: 5000
+      path :"/data/dataset/imagenet/train_record.bin"
+      mean_file: "/data/dataset/imagenet/image_mean.bin"
+      batchsize: 32
+      #random_skip: 5000
       shape: 3
       shape: 256
       shape: 256
@@ -38,13 +39,13 @@ neuralnet {
     store_conf {
       backend: "kvfile"
       path :"/data/dataset/val_record.bin"
-      mean_file: "/data/dataset/val_mean.bin"
+      mean_file: "/data/dataset/image_mean.bin"
       batchsize: 256
       shape: 3
       shape: 256
       shape: 256
     }
-      include: kTest
+    include: kTest
   }
   layer{
     name: "image"
@@ -75,6 +76,8 @@ neuralnet {
     }
     param {
       name: "b1"
+      lr_scale: 2
+      wd_scale: 0
       init {
         type: kConstant
         value: 0
@@ -88,6 +91,17 @@ neuralnet {
 #    partition_dim: 0
   }
   layer {
+    name: "pool1"
+    type: kCPooling
+    pooling_conf {
+      pool: MAX
+      kernel: 3
+      stride: 2
+    }
+    srclayers: "relu1"
+#    partition_dim: 0
+  }
+  layer {
     name: "norm1"
     type: kLRN
     lrn_conf {
@@ -96,24 +110,14 @@ neuralnet {
       beta: 0.75
       knorm: 2
     }
-    srclayers: "relu1"
-#    partition_dim: 0
-  }
-  layer {
-    name: "pool1"
-    type: kCPooling
-    pooling_conf {
-      pool: MAX
-      kernel: 3
-      stride: 2
-    }
-    srclayers: "norm1"
+    srclayers: "pool1"
 #    partition_dim: 0
   }
+
   layer{
     name: "conv2"
     type: kCConvolution
-    srclayers: "pool1"
+    srclayers: "norm1"
     convolution_conf {
       num_filters: 256
       kernel: 5
@@ -129,9 +133,11 @@ neuralnet {
     }
     param {
       name: "b2"
+      lr_scale: 2
+      wd_scale: 0
       init {
         type: kConstant
-        value: 0
+        value: 1
       }
     }
   }
@@ -142,6 +148,18 @@ neuralnet {
 #    partition_dim: 0
   }
   layer {
+    name: "pool2"
+    type: kCPooling
+    pooling_conf {
+      pool: MAX
+      kernel: 3
+      stride: 2
+    }
+    srclayers: "relu2"
+#    partition_dim: 0
+  }
+
+  layer {
     name: "norm2"
     type: kLRN
     lrn_conf {
@@ -150,24 +168,13 @@ neuralnet {
       beta: 0.75
       knorm: 2
     }
-    srclayers: "relu2"
-#    partition_dim: 0
-  }
-  layer {
-    name: "pool2"
-    type: kCPooling
-    pooling_conf {
-      pool: MAX
-      kernel: 3
-      stride: 2
-    }
-    srclayers: "norm2"
+    srclayers: "pool2"
 #    partition_dim: 0
   }
   layer{
     name: "conv3"
     type: kCConvolution
-    srclayers: "pool2"
+    srclayers: "norm2"
     convolution_conf {
       num_filters: 384
       kernel: 3
@@ -183,6 +190,8 @@ neuralnet {
     }
     param {
       name: "b3"
+      lr_scale: 2
+      wd_scale: 0
       init {
         type: kConstant
         value: 0
@@ -214,9 +223,11 @@ neuralnet {
     }
     param {
       name: "b4"
+      lr_scale:2
+      wd_scale:0
       init {
         type: kConstant
-        value: 0
+        value: 1
       }
     }
   }
@@ -245,9 +256,11 @@ neuralnet {
     }
     param {
       name: "b5"
+      lr_scale: 2
+      wd_scale: 0
       init {
         type: kConstant
-        value: 0
+        value: 1
       }
     }
   }
@@ -278,11 +291,13 @@ neuralnet {
       name: "w6"
       init {
         type: kGaussian
-        std: 0.01
+        std: 0.005
       }
     }
     param {
       name: "b6"
+      lr_scale: 2
+      wd_scale: 0
       init {
         type: kConstant
         value: 1
@@ -314,11 +329,13 @@ neuralnet {
       name: "w7"
       init {
         type: kGaussian
-        std: 0.01
+        std: 0.005
       }
     }
     param {
       name: "b7"
+      lr_scale: 2
+      wd_scale: 0
       init {
         type: kConstant
         value: 1
@@ -354,9 +371,11 @@ neuralnet {
     }
     param {
       name: "b8"
+      lr_scale: 2
+      wd_scale: 0
       init {
         type: kConstant
-        value: 1
+        value: 0
       }
     }
     srclayers: "drop7"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/examples/cifar10/cudnn.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/cudnn.conf b/examples/cifar10/cudnn.conf
index 49132d8..136435b 100644
--- a/examples/cifar10/cudnn.conf
+++ b/examples/cifar10/cudnn.conf
@@ -5,7 +5,7 @@ test_freq: 1000
 #validate_steps: 100
 #validate_freq: 300
 disp_freq: 200
-gpu: 0
+gpu: 2
 #checkpoint_path: "examples/cifar10/checkpoint/step1000-worker0"
 train_one_batch {
   alg: kBP
@@ -116,6 +116,7 @@ neuralnet {
     activation_conf {
       type: RELU
     }
+    share_src_blobs: true
     srclayers:"pool1"
   }
   layer {
@@ -161,6 +162,7 @@ neuralnet {
     activation_conf {
       type: RELU
     }
+    share_src_blobs: true
     srclayers:"conv2"
   }
   layer {
@@ -216,6 +218,7 @@ neuralnet {
     activation_conf {
       type: RELU
     }
+    share_src_blobs: true
     srclayers:"conv3"
   }
   layer {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index 301b72f..34e67ad 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -306,6 +306,9 @@ void Map(Dtype alpha, const Blob<Dtype>& A, Blob<Dtype>*
B) {
     cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data());
   } else {
 #ifdef USE_GPU
+    gpu_e_f<Op>(A.count(), A.gpu_data(), alpha, B->mutable_gpu_data());
+#else
+    LOG(FATAL) << "Not implemented";
 #endif  // USE_GPU
   }
 }
@@ -324,6 +327,7 @@ void Map(Dtype alpha, const Blob<Dtype>& A, const Blob<Dtype>&
B,
         C->mutable_cpu_data());
   } else {
 #ifdef USE_GPU
+    LOG(ERROR) << "Not implemented";
 #endif  // USE_GPU
   }
 }
@@ -670,6 +674,8 @@ void SampleUniform(Dtype low, Dtype high, Blob<Dtype>* A) {
 #ifdef USE_GPU
     gpu_sample_uniform(context->curand_generator(thread), A->count(), low, high,
         A->mutable_gpu_data());
+#else
+    LOG(FATAL) << "Not implemented";
 #endif
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/include/singa/utils/math_kernel.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_kernel.h b/include/singa/utils/math_kernel.h
index 8eb7cf5..0239d3d 100644
--- a/include/singa/utils/math_kernel.h
+++ b/include/singa/utils/math_kernel.h
@@ -79,7 +79,8 @@ extern "C" {
 
   void singa_gpu_set_value(float *data, float value, int n);
 
-  void singa_gpu_threshold(const float *src_data, float *des_data, int n);
+  void singa_gpu_threshold(const float *src_data, float *des_data,
+      float alpha, int n);
 };
 
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/connection_layer/slice.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer/slice.cc b/src/neuralnet/connection_layer/slice.cc
index 15fdc48..efa33a4 100644
--- a/src/neuralnet/connection_layer/slice.cc
+++ b/src/neuralnet/connection_layer/slice.cc
@@ -155,11 +155,11 @@ const std::string SliceLayer::ToString(bool debug, int flag) {
   string ret = "";
   if ((flag & kForward) == kForward && data_.count() !=0) {
     for (unsigned k = 0; k < datavec_.size(); k++)
-      ret += StringPrintf("data-%u :%13.9f ", k, Asum(*datavec_.at(k)));
+      ret += StringPrintf("data-%u :%e ", k, Asum(*datavec_.at(k)));
   }
   if ((flag & kBackward) == kBackward && grad_.count() != 0) {
     for (unsigned k = 0; k < gradvec_.size(); k++)
-    ret += StringPrintf("grad-%u:%13.9f ", k, Asum(*gradvec_.at(k)));
+    ret += StringPrintf("grad-%u:%e ", k, Asum(*gradvec_.at(k)));
   }
   return ret;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/input_layer/image_preprocess.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/image_preprocess.cc b/src/neuralnet/input_layer/image_preprocess.cc
index c63c957..6f2e094 100644
--- a/src/neuralnet/input_layer/image_preprocess.cc
+++ b/src/neuralnet/input_layer/image_preprocess.cc
@@ -39,7 +39,7 @@ void ImagePreprocessLayer::Setup(const LayerProto& conf,
   const auto& shape = src.shape();
   CHECK_EQ(shape.size(), 4);
   CHECK_EQ(shape.at(2), shape.at(3));
-  if (cropsize_ != 0 && cropsize_ != shape.at(2)) {
+  if (cropsize_ && (cropsize_ != shape.at(2) || cropsize_ != shape.at(3))) {
     data_.Reshape(vector<int>{shape.at(0), shape.at(1), cropsize_, cropsize_});
   } else {
     data_ = src;
@@ -49,29 +49,29 @@ void ImagePreprocessLayer::Setup(const LayerProto& conf,
 void ImagePreprocessLayer::ComputeFeature(int flag,
     const vector<Layer*>& srclayers) {
   const auto& srcdata = srclayers.at(0)->data(this);
-  int batchsize = srcdata.shape()[0], channel = srcdata.shape()[1];
-  int height = srcdata.shape()[2], width = srcdata.shape()[3];
+  int batchsize = srcdata.shape(0), channel = srcdata.shape(1);
+  int height = srcdata.shape(2), width = srcdata.shape(3);
+  int srcimage_size = channel * height * width;
+  int image_size = channel * data_.shape(2) * data_.shape(3);
+  std::uniform_int_distribution<int> rand1(0, height - cropsize_);
+  std::uniform_int_distribution<int> rand2(0, width - cropsize_);
+  auto generator = Singleton<Context>::Instance()->rand_generator();
+
   const float* srcdptr = srcdata.cpu_data();
   float* dptr = data_.mutable_cpu_data();
-  int srcimage_size = channel * height * width;
-  int image_size = channel * data_.shape()[2] * data_.shape()[3];
-  std::uniform_int_distribution<int> rand1(0, srcdata.shape()[1] - cropsize_);
-  std::uniform_int_distribution<int> rand2(0, srcdata.shape()[2] - cropsize_);
-  auto generator =
-    Singleton<Context>::Instance()->rand_generator(std::this_thread::get_id());
 
   for (int k = 0; k < batchsize; k++) {
     int h_offset = 0, w_offset = 0;
-    if (cropsize_> 0 && ((flag & kTrain) == kTrain)) {
+    if (cropsize_> 0 && (flag & kTrain)) {
       h_offset = rand1(*generator);
       w_offset = rand2(*generator);
     }
     bool do_mirror = mirror_
                     && (rand1(*generator) % 2)
-                    && ((flag & kTrain) == kTrain);
+                    && (flag & kTrain);
     ImageTransform(srcdptr + k * srcimage_size, nullptr, do_mirror, cropsize_,
-        cropsize_, h_offset, w_offset, srcdata.shape()[1], height, width,
-        scale_, dptr + image_size);
+        cropsize_, h_offset, w_offset, channel, height, width,
+        scale_, dptr + k * image_size);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/input_layer/record.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/record.cc b/src/neuralnet/input_layer/record.cc
index 2fb71c6..b14fc80 100644
--- a/src/neuralnet/input_layer/record.cc
+++ b/src/neuralnet/input_layer/record.cc
@@ -32,7 +32,7 @@ void RecordInputLayer::Setup(const LayerProto& conf,
 }
 
 void RecordInputLayer::LoadRecord(const string& backend,
-    const string&path, Blob<float>* to) {
+    const string& path, Blob<float>* to) {
   io::Store* store = io::OpenStore(backend, path, io::kRead);
   string key, val;
   CHECK(store->Read(&key, &val));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc
index 3698b21..ac673dd 100644
--- a/src/neuralnet/layer.cc
+++ b/src/neuralnet/layer.cc
@@ -48,15 +48,15 @@ const std::string Layer::ToString(bool debug, int flag) {
     return "";
   string ret = "";
   if ((flag & kForward) == kForward && data_.count() !=0) {
-    ret += StringPrintf("data:%13.9f ", Asum(data_));
+    ret += StringPrintf("data:%e ", Asum(data_));
     for (Param* p : GetParams())
       ret += StringPrintf("%s:%13.9f ",
           p->name().c_str(), Asum(p->data()));
   }
   if ((flag & kBackward) == kBackward && grad_.count() != 0) {
-    ret += StringPrintf("grad:%13.9f ", Asum(grad_));
+    ret += StringPrintf("grad:%e ", Asum(grad_));
     for (Param* p : GetParams())
-      ret += StringPrintf("%s:%13.9f ",
+      ret += StringPrintf("%13.9f ",
           p->name().c_str(), Asum(p->grad()));
   }
   return ret;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/activation.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/activation.cc b/src/neuralnet/neuron_layer/activation.cc
index 492e453..f75961e 100644
--- a/src/neuralnet/neuron_layer/activation.cc
+++ b/src/neuralnet/neuron_layer/activation.cc
@@ -28,7 +28,11 @@ void ActivationLayer::Setup(const LayerProto& conf,
     const vector<Layer*>& srclayers) {
   NeuronLayer::Setup(conf, srclayers);
   data_.ReshapeLike(srclayers[0]->data(this));
-  grad_.ReshapeLike(*(srclayers[0]->mutable_grad(this)));
+  grad_.ReshapeLike(data_);
+  if (conf.share_src_blobs()) {
+    data_.ShareData(srclayers[0]->mutable_data(this), false);
+    grad_.ShareData(srclayers[0]->mutable_grad(this), false);
+  }
 }
 void
 ActivationLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
@@ -57,7 +61,7 @@ ActivationLayer::ComputeGradient(int flag, const vector<Layer*>&
srclayers) {
   Blob<float> * gsrc = srclayers[0]->mutable_grad(this);
   switch (layer_conf_.activation_conf().type()) {
     case RELU:
-      Map<op::Relu<float>, float>(data_, gsrc);
+      Map<op::ReluGrad<float>, float>(data_, gsrc);
       Mult(*gsrc, grad_, gsrc);
       break;
     case SIGMOID:
@@ -65,12 +69,12 @@ ActivationLayer::ComputeGradient(int flag, const vector<Layer*>&
srclayers) {
       Mult(*gsrc, grad_, gsrc);
       break;
     case TANH:
-      Map<op::Tanh<float>, float>(data_, gsrc);
+      Map<op::TanhGrad<float>, float>(data_, gsrc);
       Mult(*gsrc, grad_, gsrc);
       break;
       /*
     case ActivationType_STANH:
-      Map<op::STanh<float>, float>(data_, gsrc);
+      Map<op::STanhGrad<float>, float>(data_, gsrc);
       Mult(*gsrc, grad_, gsrc);
       break;
       */

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/cudnn_activation.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/cudnn_activation.cc b/src/neuralnet/neuron_layer/cudnn_activation.cc
index 5405b53..12b3d48 100644
--- a/src/neuralnet/neuron_layer/cudnn_activation.cc
+++ b/src/neuralnet/neuron_layer/cudnn_activation.cc
@@ -38,25 +38,34 @@ void CudnnActivationLayer::InitCudnn() {
 
   const auto& shape = data_.shape();
   CHECK_GT(shape.size(), 0);
+  // TODO(wangwei) cudnnSetTensorNdDescriptor reports error if nbdim is < 4.
+  const int nbdim = 4;
   // size of each dimension
-  int* sdim = new int[shape.size()];
-  int* stride = new int[shape.size()];
-  stride[shape.size() -1] = 1;
+  int* sdim = new int[nbdim];
+  int* stride = new int[nbdim];
   int i = shape.size() - 1;
   sdim[i] = shape[i];
   stride[i] = 1;
+  // LOG(ERROR) << "layer " << name();
+  // LOG(ERROR) << sdim[i] << " " << stride[i];
   for (--i; i >= 0; i--) {
     sdim[i] = shape[i];
     stride[i] = shape[i + 1] * stride[i + 1];
+    // LOG(ERROR) << sdim[i] << " " << stride[i];
+  }
+  // padding sdim and stride to 4 dimensions
+  for (i = shape.size(); i < nbdim; i++) {
+    sdim[i] = 1;
+    stride[i] = 1;
   }
   CHECK_CUDNN(cudnnSetTensorNdDescriptor(src_desc_,
         CUDNN_DATA_FLOAT,
-        shape.size(),
+        nbdim,
         sdim,
         stride));
   CHECK_CUDNN(cudnnSetTensorNdDescriptor(my_desc_,
         CUDNN_DATA_FLOAT,
-        shape.size(),
+        nbdim,
         sdim,
         stride));
   delete[] sdim;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/cudnn_lrn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/cudnn_lrn.cc b/src/neuralnet/neuron_layer/cudnn_lrn.cc
index 8237b13..fb8e476 100644
--- a/src/neuralnet/neuron_layer/cudnn_lrn.cc
+++ b/src/neuralnet/neuron_layer/cudnn_lrn.cc
@@ -37,7 +37,6 @@ void CudnnLRNLayer::InitCudnn() {
         alpha_,
         beta_,
         knorm_));
-  CHECK_CUDNN(cudnnCreateTensorDescriptor(&src_desc_));
   CHECK_CUDNN(cudnnSetTensor4dDescriptor(src_desc_,
       CUDNN_TENSOR_NCHW,
       CUDNN_DATA_FLOAT,
@@ -45,7 +44,6 @@ void CudnnLRNLayer::InitCudnn() {
       channels_,
       height_,
       width_));
-  CHECK_CUDNN(cudnnCreateTensorDescriptor(&my_desc_));
   CHECK_CUDNN(cudnnSetTensor4dDescriptor(my_desc_,
       CUDNN_TENSOR_NCHW,
       CUDNN_DATA_FLOAT,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/dropout.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/dropout.cc b/src/neuralnet/neuron_layer/dropout.cc
index 6e0db76..706b999 100644
--- a/src/neuralnet/neuron_layer/dropout.cc
+++ b/src/neuralnet/neuron_layer/dropout.cc
@@ -48,12 +48,14 @@ void DropoutLayer::ComputeFeature(int flag, const vector<Layer*>&
srclayers) {
   Blob<float> rand(data_.count());
   SampleUniform(0.0f, 1.0f, &rand);
   Map<op::Threshold<float>, float>(pkeep, rand, &mask_);
+  // scale the mask to avoid scaling in ComputeGradient
   Scale(1.0f / pkeep, &mask_);
   Mult(srclayers[0]->data(this), mask_, &data_);
 }
 
 void DropoutLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) 
{
   Mult(grad_, mask_, srclayers[0]->mutable_grad(this));
+  // no need to mult scale as mask is scaled already.
 }
 
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/lrn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/lrn.cc b/src/neuralnet/neuron_layer/lrn.cc
index 9a5ba37..ce96d11 100644
--- a/src/neuralnet/neuron_layer/lrn.cc
+++ b/src/neuralnet/neuron_layer/lrn.cc
@@ -64,9 +64,13 @@ void LRNLayer::ComputeGradient(int flag, const vector<Layer*>&
srclayers) {
   auto grad = Tensor4(&grad_);
   auto gsrc = Tensor4(srclayers[0]->mutable_grad(this));
 
-  gsrc = grad * expr::F<op::power>(norm, -beta_);
-  gsrc += (- 2.0f * beta_ * salpha) * expr::chpool<red::sum>(
-      grad * src * expr::F<op::power>(norm, -beta_ - 1.0f), lsize_)  * src;
+  gsrc = grad * expr::F<op::power>(norm, -beta_ );
+  Tensor<cpu, 4> tmp(gsrc.shape);
+  AllocSpace(tmp);
+  tmp = gsrc * src / norm;
+  gsrc += ( - 2.0f * beta_ * salpha ) * expr::chpool<red::sum>(tmp, lsize_ )
+    * src;
+  FreeSpace(tmp);
 }
 
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 22d4bc5..03ee327 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -192,7 +192,6 @@ message LayerProto {
   optional LayerType type = 20 [default = kUserLayer];
   // type of user layer
   optional string user_type = 21;
-
   // overrides the partition dimension for neural net
   optional int32 partition_dim = 60 [default = -1];
   // names of parameters shared from other layers

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/utils/image_transform.cc
----------------------------------------------------------------------
diff --git a/src/utils/image_transform.cc b/src/utils/image_transform.cc
index 36bec39..28d5f4c 100644
--- a/src/utils/image_transform.cc
+++ b/src/utils/image_transform.cc
@@ -26,11 +26,11 @@ void ImageTransform(const float* in, const float* mean, bool mirror, int
h_crop,
     int w_crop, int h_offset, int w_offset, int channel, int height, int width,
     float scale, float* out) {
   if (h_crop == 0) {
-    CHECK_NE(h_offset, 0);
+    CHECK_EQ(h_offset, 0);
     h_crop = height;
   }
   if (w_crop ==0) {
-    CHECK_NE(w_offset, 0);
+    CHECK_EQ(w_offset, 0);
     w_crop = width;
   }
   CHECK_NE(scale, 0);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index ef6d983..bb055c2 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -7,9 +7,9 @@
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
-* 
+*
 *   http://www.apache.org/licenses/LICENSE-2.0
-* 
+*
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -59,7 +59,10 @@ float FixedStepLRGen::Get(int step) {
 float StepLRGen::Get(int step) {
   // do not cast int to float
   int freq = proto_.step_conf().change_freq();
-  return  proto_.base_lr() * pow(proto_.step_conf().gamma(), step / freq);
+  float lr = proto_.base_lr() * pow(proto_.step_conf().gamma(), step / freq);
+  LOG_IF(ERROR, step % freq == 0) << "Update learning rate to " << lr
+    << " @ step " << step;
+  return lr;
 }
 
 float LinearLRGen::Get(int step) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/worker.cc
----------------------------------------------------------------------
diff --git a/src/worker.cc b/src/worker.cc
index c240e84..8495b5c 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -344,7 +344,7 @@ void BPWorker::Forward(int step, Phase phase, NeuralNet* net) {
           Collect(step, p);
         }
       }
-      // LOG(ERROR) << layer->name() << " forward";
+      // DLOG(ERROR) << "Forward " << layer->name();
       layer->ComputeFeature(phase | kForward, net->srclayers(layer));
       if (job_conf_.debug() && grp_id_ == 0)
         label[layer->name()] = layer->ToString(true, phase | kForward);


Mime
View raw message