singa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wang...@apache.org
Subject [2/3] incubator-singa git commit: fix minor bug
Date Tue, 16 Aug 2016 07:43:06 GMT
fix minor bug

1. in pool.cc due to buf check, which should be done only for average pooling
2. update the tensor.py to avoid error in to_numpy() due to
inconsistency of swig tensor and py tensor members, e.g. device/shape.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/22889bc5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/22889bc5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/22889bc5

Branch: refs/heads/dev
Commit: 22889bc5e5537c2ecc1607d01eb3bdb19fbaa7dc
Parents: 0a76425
Author: Wei Wang <wangwei.cs@gmail.com>
Authored: Tue Aug 16 15:18:07 2016 +0800
Committer: Wei Wang <wangwei@comp.nus.edu.sg>
Committed: Tue Aug 16 15:39:39 2016 +0800

----------------------------------------------------------------------
 examples/char-rnn/train.py | 36 +++++++++++++++++++-----------------
 src/model/layer/pooling.cc | 17 +++++++++--------
 src/python/singa/tensor.py | 16 ++++++++++------
 3 files changed, 38 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/22889bc5/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index 137df80..d28646e 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -98,7 +98,7 @@ def get_lr(epoch):
 
 
 def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
-          num_stacks=1, dropout=0.5, model_path='model.bin'):
+          num_stacks=1, dropout=0.5, model_path='model'):
     # SGD with L2 gradient normalization
     opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
     cuda = device.create_cuda_gpu()
@@ -194,22 +194,24 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
         print 'Epoch %d, evaluation loss is %f' % \
             (epoch, eval_loss / data.num_test_batch / seq_length)
 
-    # checkpoint the file model
-    with open(model_path, 'wb') as fd:
-        print 'saving model to %s' % model_path
-        d = {}
-        for name, w in zip(
-                ['rnn_w', 'dense_w', 'dense_b'],
-                [rnn_w, dense_w, dense_b]):
-            w.to_host()
-            d[name] = tensor.to_numpy(w)
-        d['idx_to_char'] = data.idx_to_char
-        d['char_to_idx'] = data.char_to_idx
-        d['hidden_size'] = hidden_size
-        d['num_stacks'] = num_stacks
-        d['dropout'] = dropout
-
-        pickle.dump(d, fd)
+        if (epoch + 1) % 30 == 0:
+            # checkpoint the file model
+            with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
+                print 'saving model to %s' % model_path
+                d = {}
+                for name, w in zip(
+                        ['rnn_w', 'dense_w', 'dense_b'],
+                        [rnn_w, dense_w, dense_b]):
+                    w.to_host()
+                    d[name] = tensor.to_numpy(w)
+                    w.to_device(cuda)
+                d['idx_to_char'] = data.idx_to_char
+                d['char_to_idx'] = data.char_to_idx
+                d['hidden_size'] = hidden_size
+                d['num_stacks'] = num_stacks
+                d['dropout'] = dropout
+
+                pickle.dump(d, fd)
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/22889bc5/src/model/layer/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/pooling.cc b/src/model/layer/pooling.cc
index a18f9de..1312776 100644
--- a/src/model/layer/pooling.cc
+++ b/src/model/layer/pooling.cc
@@ -107,27 +107,28 @@ const std::pair<Tensor, vector<Tensor>> Pooling::Backward(int
flag,
   CHECK_EQ(grad.device()->lang(), kCpp);
   CHECK_EQ(grad.nDim(), 4u);
   vector<Tensor> param_grad;
-  CHECK(!buf_.empty());
-  Tensor mask = buf_.top();
-  buf_.pop();
-  size_t batchsize = grad.shape(0);
+    size_t batchsize = grad.shape(0);
   Shape shape{batchsize, channels_, height_, width_};
   auto dev = grad.device();
   DataType dtype = grad.data_type();
   Tensor dx(shape, dev, dtype);
   auto gradptr = grad.data<float>();
-  auto maskptr = mask.data<float>();
   float* dxptr = new float[dx.Size()];
-  if (pool_ == PoolingConf_PoolMethod_MAX)
+  if (pool_ == PoolingConf_PoolMethod_MAX) {
+    CHECK(!buf_.empty());
+    Tensor mask = buf_.top();
+    buf_.pop();
+    auto maskptr = mask.data<float>();
     BackwardMaxPooling(gradptr, maskptr, batchsize, channels_, height_, width_,
                        kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_,
                        stride_w_, dxptr);
-  else if (pool_ == PoolingConf_PoolMethod_AVE)
+  } else if (pool_ == PoolingConf_PoolMethod_AVE) {
     BackwardAvgPooling(gradptr, batchsize, channels_, height_, width_,
                        kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_,
                        stride_w_, dxptr);
-  else
+  } else {
     LOG(FATAL) << "Unknow pooling method";
+  }
 
   dx.CopyDataFromHostPtr(dxptr, dx.Size());
   delete[] dxptr;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/22889bc5/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index a1e948d..f6bca43 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -79,15 +79,14 @@ class Tensor(object):
             return
         else:
             assert isinstance(shape, tuple), 'shape should be tuple'
-            vs = list(shape)
             if device is None:
                 device = pydevice.get_default_device()
-                self.singa_tensor = singa.Tensor(vs, device, dtype)
+                self.singa_tensor = singa.Tensor(list(shape), device, dtype)
             else:
-                self.singa_tensor = singa.Tensor(vs, device, dtype)
-            self.shape = shape
-            self.device = device
-            self.dtype = dtype
+                self.singa_tensor = singa.Tensor(list(shape), device, dtype)
+        self.shape = shape
+        self.dtype = dtype
+        self.device = device
 
     def ndim(self):
         '''
@@ -136,6 +135,9 @@ class Tensor(object):
             t (Tensor)
         '''
         self.singa_tensor.ResetLike(t.singa_tensor)
+        self.shape = t.shape
+        self.device = t.device
+        self.dtype = t.dtype
 
     '''
     def as_type(self, dtype):
@@ -153,11 +155,13 @@ class Tensor(object):
             device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU
         '''
         self.singa_tensor.ToDevice(device)
+        self.device = device
 
     def to_host(self):
         '''Move the tensor data onto the default host CppCPU device.
         '''
         self.singa_tensor.ToHost()
+        self.device = pydevice.default_device
 
     def l2(self):
         '''


Mime
View raw message