singa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wan...@apache.org
Subject [1/4] incubator-singa git commit: SINGA-381 - Update the autograd API to yeild the gradients
Date Wed, 11 Jul 2018 08:29:33 GMT
Repository: incubator-singa
Updated Branches:
  refs/heads/master e16cea129 -> b30d7ea55


SINGA-381 - Update the autograd API to yeild the gradients

yield gradients by backward() in autograd.py; this saves memory by releasing gradients early


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/81908a82
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/81908a82
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/81908a82

Branch: refs/heads/master
Commit: 81908a82f4c9ea01b1359ed3d8fb4118a5bfd147
Parents: e16cea1
Author: Wang Wei <wangwei.cs@gmail.com>
Authored: Thu Jul 5 22:09:27 2018 +0800
Committer: wang wei <wangwei@comp.nus.edu.sg>
Committed: Wed Jul 11 15:19:27 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mlp.py       |  8 +++-----
 examples/autograd/mnist_cnn.py |  6 ++----
 python/singa/autograd.py       | 20 +++++++++++++-------
 3 files changed, 18 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/81908a82/examples/autograd/mlp.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mlp.py b/examples/autograd/mlp.py
index 0447927..e90ff1d 100755
--- a/examples/autograd/mlp.py
+++ b/examples/autograd/mlp.py
@@ -62,7 +62,7 @@ if __name__ == '__main__':
     label = to_categorical(label, 2).astype(np.float32)
     print('train_data_shape:', data.shape)
     print('train_label_shape:', label.shape)
-    
+
     inputs = Tensor(data=data)
     target = Tensor(data=label)
 
@@ -86,10 +86,8 @@ if __name__ == '__main__':
         x = autograd.add_bias(x, b1)
         x = autograd.soft_max(x)
         loss = autograd.cross_entropy(x, target)
-        in_grads = autograd.backward(loss)
-
-        for param in in_grads:
-            sgd.apply(0, in_grads[param], param, '')
+        for p, gp in autograd.backward(loss):
+            sgd.apply(0, gp, p, '')
 
         if (i % 100 == 0):
             print('training loss = ', tensor.to_numpy(loss)[0])

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/81908a82/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index 5b4e608..db21485 100755
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -135,7 +135,5 @@ if __name__ == '__main__':
                 print('accuracy is:', accuracy_rate, 'loss is:',
                       tensor.to_numpy(loss)[0])
 
-            in_grads = autograd.backward(loss)
-
-            for param in in_grads:
-                sgd.apply(0, in_grads[param], param, '')
+            for p, gp in autograd.backward(loss):
+                sgd.apply(0, gp, p, '')

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/81908a82/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 9fd8b4d..2ba3098 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -538,6 +538,13 @@ def infer_dependency(op):
     return dependency_count
 
 
+def gradients(y, dy=None):
+    grads = {}  # mapping: x->dx if x.stores_grad
+    for p, dp in backward(y, dy):
+        gradients[p] = dp
+    return grads
+
+
 def backward(y, dy=None):
     '''
     Run the backward propagation starting at y.
@@ -566,7 +573,7 @@ def backward(y, dy=None):
     # ready is a queue of (operation, dy list)
     ready = deque([(y.creator, (dy,))])
     not_ready = {}  # mapping: op->[dy]
-    gradients = {}  # mapping: x->dx if x.stores_grad
+
     if y.stores_grad:
         gradients[y] = dy
 
@@ -608,7 +615,8 @@ def backward(y, dy=None):
             if y_stores_grad:
                 # store the gradient for final return, e.g. if x is parameter
                 g = not_ready[src_op][y_idx]
-                gradients[y] = Tensor(device=g.device(), data=g)
+                tg = Tensor(device=g.device(), data=g)
+                yield (y, tg)
             dependency[src_op] -= 1
             if src_op.requires_grad is True:
                 if dependency[src_op] == 0:
@@ -616,10 +624,8 @@ def backward(y, dy=None):
                         ready.append((src_op, not_ready[src_op]))
                     del not_ready[src_op]
 
-    return gradients
-
 
-class NewLayer(object):
+class Layer(object):
 
     def __init__(self):
         pass
@@ -631,7 +637,7 @@ class NewLayer(object):
                 var.to_device(x_device)
 
 
-class Linear(NewLayer):
+class Linear(Layer):
 
     def __init__(self, in_features, out_features, bias=True):
         #self.in_features = in_features
@@ -661,7 +667,7 @@ class Linear(NewLayer):
         return y
 
 
-class Conv2D(NewLayer):
+class Conv2D(Layer):
 
     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                  padding=0, dilation=1, groups=1, bias=True, **kwargs):


Mime
View raw message