singa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kaip...@apache.org
Subject [1/8] incubator-singa git commit: SINGA-305 - Add jupyter notebooks for SINGA V1 tutorial
Date Wed, 15 Mar 2017 09:16:27 GMT
Repository: incubator-singa
Updated Branches:
  refs/heads/master a6e7690bd -> 0695daa66


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/requirements.txt
----------------------------------------------------------------------
diff --git a/doc/en/docs/notebook/requirements.txt b/doc/en/docs/notebook/requirements.txt
new file mode 100644
index 0000000..21e293b
--- /dev/null
+++ b/doc/en/docs/notebook/requirements.txt
@@ -0,0 +1,5 @@
+matplotlib=2.0.0=np112py27_0
+nb_conda_kernels=2.0.0=py27_0
+nb_conda=2.0.0=py27_0
+pillow=4.0.0=py27_1
+tqdm=4.11.2=py27_0

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/rnn.ipynb
----------------------------------------------------------------------
diff --git a/doc/en/docs/notebook/rnn.ipynb b/doc/en/docs/notebook/rnn.ipynb
new file mode 100644
index 0000000..054ac19
--- /dev/null
+++ b/doc/en/docs/notebook/rnn.ipynb
@@ -0,0 +1,257 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# RNN for Character Level Language Modeling"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Dataset pre-processing\n",
+    "\n",
+    "### sample data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import cPickle as pickle\n",
+    "import numpy as np\n",
+    "import argparse\n",
+    "\n",
+    "# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))\n",
+    "from singa import layer\n",
+    "from singa import loss\n",
+    "from singa import device\n",
+    "from singa import tensor\n",
+    "from singa import optimizer\n",
+    "from singa import initializer\n",
+    "from singa.proto import model_pb2\n",
+    "from tqdm import tnrange"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "class Data(object):\n",
+    "\n",
+    "    def __init__(self, fpath, batch_size=32, seq_length=100, train_ratio=0.8):\n",
+    "        '''Data object for loading a plain text file.\n",
+    "        Args:\n",
+    "            fpath, path to the text file.\n",
+    "            train_ratio, split the text file into train and test sets, where\n",
+    "                train_ratio of the characters are in the train set.\n",
+    "        '''\n",
+    "        self.raw_data = open(fpath, 'r').read()  # read text file\n",
+    "        chars = list(set(self.raw_data))\n",
+    "        self.vocab_size = len(chars)\n",
+    "        self.char_to_idx = {ch: i for i, ch in enumerate(chars)}\n",
+    "        self.idx_to_char = {i: ch for i, ch in enumerate(chars)}\n",
+    "        data = [self.char_to_idx[c] for c in self.raw_data]\n",
+    "        # seq_length + 1 for the data + label\n",
+    "        nsamples = len(data) / (1 + seq_length)\n",
+    "        data = data[0:nsamples * (1 + seq_length)]\n",
+    "        data = np.asarray(data, dtype=np.int32)\n",
+    "        data = np.reshape(data, (-1, seq_length + 1))\n",
+    "        # shuffle all sequences\n",
+    "        np.random.shuffle(data)\n",
+    "        self.train_dat = data[0:int(data.shape[0]*train_ratio)]\n",
+    "        self.num_train_batch = self.train_dat.shape[0] / batch_size\n",
+    "        self.val_dat = data[self.train_dat.shape[0]:]\n",
+    "        self.num_test_batch = self.val_dat.shape[0] / batch_size\n",
+    "        print 'train dat', self.train_dat.shape\n",
+    "        print 'val dat', self.val_dat.shape\n",
+    "        \n",
+    "def numpy2tensors(npx, npy, dev):\n",
+    "    '''batch, seq, dim -- > seq, batch, dim'''\n",
+    "    tmpx = np.swapaxes(npx, 0, 1)\n",
+    "    tmpy = np.swapaxes(npy, 0, 1)\n",
+    "    inputs = []\n",
+    "    labels = []\n",
+    "    for t in range(tmpx.shape[0]):\n",
+    "        x = tensor.from_numpy(tmpx[t])\n",
+    "        y = tensor.from_numpy(tmpy[t])\n",
+    "        x.to_device(dev)\n",
+    "        y.to_device(dev)\n",
+    "        inputs.append(x)\n",
+    "        labels.append(y)\n",
+    "    return inputs, labels\n",
+    "\n",
+    "\n",
+    "def convert(batch, batch_size, seq_length, vocab_size, dev):\n",
+    "    '''convert a batch of data into a sequence of input tensors'''\n",
+    "    y = batch[:, 1:]\n",
+    "    x1 = batch[:, :seq_length]\n",
+    "    x = np.zeros((batch_size, seq_length, vocab_size), dtype=np.float32)\n",
+    "    for b in range(batch_size):\n",
+    "        for t in range(seq_length):\n",
+    "            c = x1[b, t]\n",
+    "            x[b, t, c] = 1\n",
+    "    return numpy2tensors(x, y, dev)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create the network"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "def get_lr(epoch):\n",
+    "    return 0.001 / float(1 << (epoch / 50))\n",
+    "\n",
+    "data = Data('static/linux_input.txt')\n",
+    "# SGD with L2 gradient normalization\n",
+    "opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))\n",
+    "cuda = device.create_cuda_gpu()\n",
+    "rnn = layer.LSTM(name='lstm', hidden_size=32, num_stacks=1, dropout=0.5, input_sample_shape=(data.vocab_size,))\n",
+    "rnn.to_device(cuda)\n",
+    "rnn_w = rnn.param_values()[0]\n",
+    "rnn_w.uniform(-0.08, 0.08)  \n",
+    "\n",
+    "dense = layer.Dense('dense', data.vocab_size, input_sample_shape=(32,))\n",
+    "dense.to_device(cuda)\n",
+    "dense_w = dense.param_values()[0]\n",
+    "dense_b = dense.param_values()[1]\n",
+    "print 'dense w ', dense_w.shape\n",
+    "print 'dense b ', dense_b.shape\n",
+    "initializer.uniform(dense_w, dense_w.shape[0], 0)\n",
+    "print 'dense weight l1 = %f' % (dense_w.l1())\n",
+    "dense_b.set_value(0)\n",
+    "print 'dense b l1 = %f' % (dense_b.l1())\n",
+    "\n",
+    "g_dense_w = tensor.Tensor(dense_w.shape, cuda)\n",
+    "g_dense_b = tensor.Tensor(dense_b.shape, cuda)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conduct SGD"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "lossfun = loss.SoftmaxCrossEntropy()\n",
+    "train_loss = 0\n",
+    "for epoch in range(3):\n",
+    "    bar = tnrange(data.num_train_batch, desc='Epoch %d' % 0)\n",
+    "    for b in bar:\n",
+    "        batch = data.train_dat[b * batch_size: (b + 1) * batch_size]\n",
+    "        inputs, labels = convert(batch, batch_size, seq_length, data.vocab_size, cuda)\n",
+    "        inputs.append(tensor.Tensor())\n",
+    "        inputs.append(tensor.Tensor())\n",
+    "\n",
+    "        outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]\n",
+    "        grads = []\n",
+    "        batch_loss = 0\n",
+    "        g_dense_w.set_value(0.0)\n",
+    "        g_dense_b.set_value(0.0)\n",
+    "        for output, label in zip(outputs, labels):\n",
+    "            act = dense.forward(model_pb2.kTrain, output)\n",
+    "            lvalue = lossfun.forward(model_pb2.kTrain, act, label)\n",
+    "            batch_loss += lvalue.l1()\n",
+    "            grad = lossfun.backward()\n",
+    "            grad /= batch_size\n",
+    "            grad, gwb = dense.backward(model_pb2.kTrain, grad)\n",
+    "            grads.append(grad)\n",
+    "            g_dense_w += gwb[0]\n",
+    "            g_dense_b += gwb[1]\n",
+    "            # print output.l1(), act.l1()\n",
+    "            bar.set_postfix(train_loss=batch_loss / seq_length)\n",
+    "        train_loss += batch_loss\n",
+    "\n",
+    "        grads.append(tensor.Tensor())\n",
+    "        grads.append(tensor.Tensor())\n",
+    "        g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]\n",
+    "        dense_w, dense_b = dense.param_values()\n",
+    "        opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')\n",
+    "        opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w, 'dense_w')\n",
+    "        opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b, 'dense_b')\n",
+    "    print '\\nEpoch %d, train loss is %f' % (epoch, train_loss / data.num_train_batch
/ seq_length)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Checkpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:\n",
+    "    print 'saving model to %s' % model_path\n",
+    "    d = {}\n",
+    "    for name, w in zip(['rnn_w', 'dense_w', 'dense_b'],[rnn_w, dense_w, dense_b]):\n",
+    "        d[name] = tensor.to_numpy(w)\n",
+    "    d['idx_to_char'] = data.idx_to_char\n",
+    "    d['char_to_idx'] = data.char_to_idx\n",
+    "    d['hidden_size'] = hidden_size\n",
+    "    d['num_stacks'] = num_stacks\n",
+    "    d['dropout'] = dropout\n",
+    "    pickle.dump(d, fd)"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python [conda env:conda]",
+   "language": "python",
+   "name": "conda-env-conda-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/bp.PNG
----------------------------------------------------------------------
diff --git a/doc/en/docs/notebook/static/bp.PNG b/doc/en/docs/notebook/static/bp.PNG
new file mode 100644
index 0000000..ac5db33
Binary files /dev/null and b/doc/en/docs/notebook/static/bp.PNG differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/digit.jpg
----------------------------------------------------------------------
diff --git a/doc/en/docs/notebook/static/digit.jpg b/doc/en/docs/notebook/static/digit.jpg
new file mode 100644
index 0000000..8350d88
Binary files /dev/null and b/doc/en/docs/notebook/static/digit.jpg differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/models.PNG
----------------------------------------------------------------------
diff --git a/doc/en/docs/notebook/static/models.PNG b/doc/en/docs/notebook/static/models.PNG
new file mode 100644
index 0000000..9ebcfa2
Binary files /dev/null and b/doc/en/docs/notebook/static/models.PNG differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/sgd.png
----------------------------------------------------------------------
diff --git a/doc/en/docs/notebook/static/sgd.png b/doc/en/docs/notebook/static/sgd.png
new file mode 100644
index 0000000..9eac916
Binary files /dev/null and b/doc/en/docs/notebook/static/sgd.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/singav1-sw.png
----------------------------------------------------------------------
diff --git a/doc/en/docs/notebook/static/singav1-sw.png b/doc/en/docs/notebook/static/singav1-sw.png
new file mode 100644
index 0000000..e443c6e
Binary files /dev/null and b/doc/en/docs/notebook/static/singav1-sw.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/python/singa/layer.py b/python/singa/layer.py
index 7975042..6302b45 100644
--- a/python/singa/layer.py
+++ b/python/singa/layer.py
@@ -27,7 +27,13 @@ Example usages::
 
     # create a convolution layer
     conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 32))
+
+    # init param values
+    w, b = conv.param_values()
+    w.guassian(0, 0.01)
+    b.set_value(0)
     conv.to_device(dev)  # move the layer data onto a CudaGPU device
+
     x = tensor.Tensor((3, 32, 32), dev)
     x.uniform(-1, 1)
     y = conv.foward(True, x)
@@ -766,8 +772,8 @@ class Merge(Layer):
         Returns:
             A list of replicated grad, one per source layer
         '''
-        assert isinstance(grad, tensor.Tensor), 'The input must be Tensor'\
-                ' instead of %s' % type(grad).__name__
+        assert isinstance(grad, tensor.Tensor), 'The input must be Tensor' \
+            ' instead of %s' % type(grad).__name__
         return [grad] * self.num_input, []  # * self.num_input
 
 
@@ -902,7 +908,8 @@ class Slice(Layer):
     def get_output_sample_shape(self):
         out = []
         for i in range(len(self.conf.slice_conf.slice_point) + 1):
-            out.append(self.layer.GetOutputSampleShape(i))
+            out.append(self.layer.GetOutputSampleShapeAt(i))
+        return out
 
     def forward(self, flag, x):
         '''Slice the input tensor on the given axis.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index 7dee9f5..82361c5 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -60,6 +60,8 @@ from .proto import core_pb2
 from . import singa_wrap as singa
 import device as pydevice
 
+int32 = core_pb2.kInt
+float32 = core_pb2.kFloat32
 
 class Tensor(object):
     '''Create a Py Tensor, which wraps a swig converted Tensor from CPP Tensor


Mime
View raw message