Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 0194F200B61 for ; Tue, 9 Aug 2016 18:03:04 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id F2675160AAD; Tue, 9 Aug 2016 16:03:03 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id C6107160A6B for ; Tue, 9 Aug 2016 18:03:02 +0200 (CEST) Received: (qmail 18669 invoked by uid 500); 9 Aug 2016 16:03:02 -0000 Mailing-List: contact commits-help@singa.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@singa.incubator.apache.org Delivered-To: mailing list commits@singa.incubator.apache.org Received: (qmail 18660 invoked by uid 99); 9 Aug 2016 16:03:02 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 09 Aug 2016 16:03:02 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 8E5FF1A5EC3 for ; Tue, 9 Aug 2016 16:03:01 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -4.646 X-Spam-Level: X-Spam-Status: No, score=-4.646 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-1.426] autolearn=disabled Received: from mx1-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id ol8r4sBbk6WI for ; Tue, 9 Aug 2016 16:02:56 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-eu.apache.org (ASF Mail Server at mx1-lw-eu.apache.org) with SMTP id A86A45FBCA for ; Tue, 9 Aug 2016 16:02:55 +0000 (UTC) Received: (qmail 18622 invoked by uid 99); 9 Aug 2016 16:02:54 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 09 Aug 2016 16:02:54 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id C5BFEE38AC; Tue, 9 Aug 2016 16:02:54 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wangwei@apache.org To: commits@singa.incubator.apache.org Date: Tue, 09 Aug 2016 16:02:55 -0000 Message-Id: <4d1533c7b051484e8f1d3d9cc370ac19@git.apache.org> In-Reply-To: <9b4ebe8da9804176aafbd7b351290cfd@git.apache.org> References: <9b4ebe8da9804176aafbd7b351290cfd@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [2/2] incubator-singa git commit: SINGA-231 Batchnormlized VGG model for cifar-10 archived-at: Tue, 09 Aug 2016 16:03:04 -0000 SINGA-231 Batchnormlized VGG model for cifar-10 Merge the training of vgg and alexnet into train.py The validation accuracy of vgg could reach 0.89 Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/28678ae8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/28678ae8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/28678ae8 Branch: refs/heads/dev Commit: 28678ae8329112ca1f11086b52ded7149ec9ab2c Parents: bc3b74b Author: Wei Wang Authored: Tue Aug 9 20:06:29 2016 +0800 Committer: Wei Wang Committed: Wed Aug 10 00:01:03 2016 +0800 ---------------------------------------------------------------------- examples/cifar10/alexnet.py | 16 ++- examples/cifar10/predict.py | 14 ++- examples/cifar10/run-parallel.sh | 1 + examples/cifar10/train.py | 63 +++++++---- examples/cifar10/train_vgg_cifar10.py | 162 ----------------------------- examples/cifar10/vgg-parallel.cc | 24 ++--- examples/cifar10/vgg.py | 66 ++++++++++-- 7 files changed, 138 insertions(+), 208 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/alexnet.py ---------------------------------------------------------------------- diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py index 4b3daec..96c339a 100644 --- a/examples/cifar10/alexnet.py +++ b/examples/cifar10/alexnet.py @@ -14,15 +14,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= +''' This model is created following the structure from +https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-18pct.cfg +Following the same setting for hyper-parameters and data pre-processing, the final +validation accuracy would be about 82%. +''' + import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python')) from singa import layer +from singa import initializer from singa import metric from singa import loss from singa import net as ffnet -from singa.proto import core_pb2 def create_net(): @@ -44,4 +50,12 @@ def create_net(): net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1)) net.add(layer.Flatten('flat')) net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy())) + for (p, specs) in zip(net.param_values(), net.param_specs()): + filler = specs.filler + if filler.type == 'gaussian': + initializer.gaussian(p, filler.mean, filler.std) + else: + p.set_value(0) + print specs.name, filler.type, p.l1() + return net http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/predict.py ---------------------------------------------------------------------- diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py index d083d0b..07b1145 100644 --- a/examples/cifar10/predict.py +++ b/examples/cifar10/predict.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= - +import cPickle as pickle import numpy as np import sys import os @@ -27,6 +27,15 @@ import net as ffnet def predict(net, images, cuda, topk=5): + '''Predict the label of each image. + + Args: + net, a pretrained neural net + images, a batch of images [batch_size, 3, 32, 32], which have been + pre-processed + cuda, the cuda device + topk, return the topk labels for each image. + ''' x = tensor.from_numpy(images.astype(np.float32)) x.to_device(cuda) y = net.predict(x) @@ -40,7 +49,7 @@ def predict(net, images, cuda, topk=5): def load_dataset(filepath): print 'Loading data file %s' % filepath with open(filepath, 'rb') as fd: - cifar10 = cPickle.load(fd) + cifar10 = pickle.load(fd) image = cifar10['data'].astype(dtype=np.uint8) image = image.reshape((-1, 3, 32, 32)) label = np.asarray(cifar10['labels'], dtype=np.uint8) @@ -79,4 +88,5 @@ if __name__ == '__main__': mean = compute_image_mean('cifar-10-batches-py') test_images, _ = load_test_data('cifar-10-batches-py') + # minus mean is for alexnet; vgg uses a different pre-processing strategy print predict(model, test_images - mean, cuda) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/run-parallel.sh ---------------------------------------------------------------------- diff --git a/examples/cifar10/run-parallel.sh b/examples/cifar10/run-parallel.sh index 6a9109a..18193db 100755 --- a/examples/cifar10/run-parallel.sh +++ b/examples/cifar10/run-parallel.sh @@ -1,2 +1,3 @@ #!/usr/bin/env sh ../../build/bin/alexnet-parallel -epoch 4 +#../../build/bin/vgg-parallel -epoch 4 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/train.py ---------------------------------------------------------------------- diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py index f4caca4..cb4110d 100644 --- a/examples/cifar10/train.py +++ b/examples/cifar10/train.py @@ -23,9 +23,9 @@ import cPickle import numpy as np import os import sys +import argparse sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python')) -from singa import initializer from singa import utils from singa import optimizer from singa import device @@ -33,6 +33,7 @@ from singa import tensor from singa.proto import core_pb2 import alexnet +import vgg def load_dataset(filepath): @@ -65,7 +66,28 @@ def load_test_data(dir_path): return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32) -def get_lr(epoch): +def normalize_for_vgg(train_x, test_x): + mean = train_x.mean() + std = train_x.std() + train_x -= mean + test_x -= mean + train_x /= std + test_x /= std + return train_x, test_x + + +def normalize_for_alexnet(train_x, test_x): + mean = np.average(train_x, axis=0) + train_x -= mean + test_x -= mean + return train_x, test_x + + +def vgg_lr(epoch): + return 0.01 / float(1 << ((epoch / 30))) + + +def alexnet_lr(epoch): if epoch < 120: return 0.001 elif epoch < 130: @@ -74,32 +96,21 @@ def get_lr(epoch): return 0.00001 -def train(data_dir, net, num_epoch=140, batch_size=100): +def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100): print 'Start intialization............' cuda = device.create_cuda_gpu() net.to_device(cuda) opt = optimizer.SGD(momentum=0.9, weight_decay=0.004) for (p, specs) in zip(net.param_values(), net.param_specs()): - filler = specs.filler - if filler.type == 'gaussian': - initializer.gaussian(p, filler.mean, filler.std) - else: - p.set_value(0) opt.register(p, specs) - print specs.name, filler.type, p.l1() - print 'Loading data ..................' - train_x, train_y = load_train_data(data_dir) - test_x, test_y = load_test_data(data_dir) - mean = np.average(train_x, axis=0) - train_x -= mean - test_x -= mean tx = tensor.Tensor((batch_size, 3, 32, 32), cuda) ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt) + train_x, train_y, test_x, test_y = data num_train_batch = train_x.shape[0] / batch_size num_test_batch = test_x.shape[0] / batch_size idx = np.arange(train_x.shape[0], dtype=np.int32) - for epoch in range(num_epoch): + for epoch in range(max_epoch): np.random.shuffle(idx) loss, acc = 0.0, 0.0 print 'Epoch %d' % epoch @@ -135,8 +146,20 @@ def train(data_dir, net, num_epoch=140, batch_size=100): net.save('model.bin') # save model params into checkpoint file if __name__ == '__main__': - data_dir = 'cifar-10-batches-py' - assert os.path.exists(data_dir), \ + parser = argparse.ArgumentParser(description='Train vgg/alexnet for cifar10') + parser.add_argument('model', choices=['vgg', 'alexnet'], default='alexnet') + parser.add_argument('data', default='cifar-10-batches-py') + args = parser.parse_args() + assert os.path.exists(args.data), \ 'Pls download the cifar10 dataset via "download_data.py py"' - net = alexnet.create_net() - train(data_dir, net) + print 'Loading data ..................' + train_x, train_y = load_train_data(args.data) + test_x, test_y = load_test_data(args.data) + if args.model == 'alexnet': + train_x, test_x = normalize_for_alexnet(train_x, test_x) + net = alexnet.create_net() + train((train_x, train_y, test_x, test_y), net, 140, alexnet_lr, 0.004) + else: + train_x, test_x = normalize_for_vgg(train_x, test_x) + net = vgg.create_net() + train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/train_vgg_cifar10.py ---------------------------------------------------------------------- diff --git a/examples/cifar10/train_vgg_cifar10.py b/examples/cifar10/train_vgg_cifar10.py deleted file mode 100644 index e9df04e..0000000 --- a/examples/cifar10/train_vgg_cifar10.py +++ /dev/null @@ -1,162 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -""" CIFAR10 dataset is at https://www.cs.toronto.edu/~kriz/cifar.html. -It includes 5 binary dataset, each contains 10000 images. 1 row (1 image) -includes 1 label & 3072 pixels. 3072 pixels are 3 channels of a 32x32 image -""" - -import cPickle -import numpy as np -import os -import sys -import math - -sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python')) -from singa import initializer -from singa import utils -from singa import optimizer -from singa import device -from singa import tensor -from singa.proto import core_pb2 - -import vgg - - -def load_dataset(filepath): - print 'Loading data file %s' % filepath - with open(filepath, 'rb') as fd: - cifar10 = cPickle.load(fd) - image = cifar10['data'].astype(dtype=np.uint8) - image = image.reshape((-1, 3, 32, 32)) - label = np.asarray(cifar10['labels'], dtype=np.uint8) - label = label.reshape(label.size, 1) - return image, label - - -def load_train_data(dir_path, num_batches=5): - labels = [] - batchsize = 10000 - images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8) - for did in range(1, num_batches + 1): - fname_train_data = dir_path + "/data_batch_{}".format(did) - image, label = load_dataset(fname_train_data) - images[(did - 1) * batchsize:did * batchsize] = image - labels.extend(label) - images = np.array(images, dtype=np.float32) - labels = np.array(labels, dtype=np.int32) - return images, labels - - -def load_test_data(dir_path): - images, labels = load_dataset(dir_path + "/test_batch") - return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32) - - -def get_lr(epoch): - return 0.01 / float(1 << ((epoch / 30))) - #if epoch < 100: - # return 0.01 - #elif epoch < 150: - # return 0.005 - #elif epoch < 200: - # return 0.001 - #elif epoch < 250: - # return 0.0001 - - -def train(data_dir, net, num_epoch=250, batch_size=128): - print 'Creating Device............' - cuda = device.create_cuda_gpus(2)[1] - net.to_device(cuda) - print 'Start intialization............' - opt = optimizer.SGD(momentum=0.9, weight_decay=0.0005) - for (p, name) in zip(net.param_values(), net.param_names()): - print name, p.shape - if len(p.shape) > 1: - if 'mean' in name or 'beta' in name: - p.set_value(0.0) - elif 'var' in name: - p.set_value(1.0) - elif 'gamma' in name: - initializer.uniform(p, 0, 1) - elif 'conv' in name: - initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0]))) - else: - initializer.gaussian(p, 0, 0.02) - - #stdv = 1.0/math.sqrt(p.shape[1]) - #initializer.uniform(p, -stdv, stdv) - else: - p.set_value(0) - #print specs.name, filler.type, p.l1() - print name, p.l1() - print 'Loading data ..................' - train_x, train_y = load_train_data(data_dir) - test_x, test_y = load_test_data(data_dir) - mean = train_x.mean() - std = train_x.std() - train_x -= mean - test_x -= mean - train_x /= std - test_x /= std - - tx = tensor.Tensor((batch_size, 3, 32, 32), cuda) - ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt) - num_train_batch = train_x.shape[0] / batch_size - num_test_batch = test_x.shape[0] / batch_size - idx = np.arange(train_x.shape[0], dtype=np.int32) - for epoch in range(num_epoch): - np.random.shuffle(idx) - loss, acc = 0.0, 0.0 - print 'Epoch %d' % epoch - for b in range(num_train_batch): - x = train_x[idx[b * batch_size: (b + 1) * batch_size]] - y = train_y[idx[b * batch_size: (b + 1) * batch_size]] - tx.copy_from_numpy(x) - ty.copy_from_numpy(y) - grads, (l, a) = net.train(tx, ty) - loss += l - acc += a - for (s, p, g) in zip(net.param_specs(), net.param_values(), grads): - opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name)) - # update progress bar - utils.update_progress(b * 1.0 / num_train_batch, - 'training loss = %f, accuracy = %f' % (l, a)) - info = '\ntraining loss = %f, training accuracy = %f' \ - % (loss / num_train_batch, acc / num_train_batch) - print info - - loss, acc = 0.0, 0.0 - for b in range(num_test_batch): - x = test_x[b * batch_size: (b + 1) * batch_size] - y = test_y[b * batch_size: (b + 1) * batch_size] - tx.copy_from_numpy(x) - ty.copy_from_numpy(y) - l, a = net.evaluate(tx, ty) - loss += l - acc += a - - print 'test loss = %f, test accuracy = %f' \ - % (loss / num_test_batch, acc / num_test_batch) - net.save('model.bin') # save model params into checkpoint file - -if __name__ == '__main__': - data_dir = 'cifar-10-batches-py' - assert os.path.exists(data_dir), \ - 'Pls download the cifar10 dataset via "download_data.py py"' - net = vgg.create_net() - train(data_dir, net) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/vgg-parallel.cc ---------------------------------------------------------------------- diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc index ba308e9..c6b7fa1 100644 --- a/examples/cifar10/vgg-parallel.cc +++ b/examples/cifar10/vgg-parallel.cc @@ -32,7 +32,7 @@ #include "../../src/model/layer/cudnn_activation.h" #include "../../src/model/layer/cudnn_pooling.h" #include "../../src/model/layer/cudnn_lrn.h" -#include "../../src/model/layer/cudnn_dropout.h" +#include "../../src/model/layer/dropout.h" #include "../../src/model/layer/cudnn_batchnorm.h" #include "../../src/model/layer/dense.h" #include "../../src/model/layer/flatten.h" @@ -155,7 +155,7 @@ LayerConf GenBatchNormConf(string name) { LayerConf GenDropoutConf(string name, float dropout_ratio) { LayerConf conf; conf.set_name(name); - conf.set_type("CudnnDropout"); + conf.set_type("Dropout"); DropoutConf *dropout = conf.mutable_dropout_conf(); dropout->set_dropout_ratio(dropout_ratio); @@ -172,37 +172,37 @@ FeedForwardNet CreateNet() { FeedForwardNet net; Shape s{3, 32, 32}; ConvBNReLU(net, "conv1_1", 64, &s); - net.Add(new CudnnDropout(), GenDropoutConf("drop1", 0.3)); + net.Add(new Dropout(), GenDropoutConf("drop1", 0.3)); ConvBNReLU(net, "conv1_2", 64); net.Add(new CudnnPooling(), GenPoolingConf("pool1", true, 2, 2, 0)); ConvBNReLU(net, "conv2_1", 128); - net.Add(new CudnnDropout(), GenDropoutConf("drop2", 0.4)); + net.Add(new Dropout(), GenDropoutConf("drop2", 0.4)); ConvBNReLU(net, "conv2_2", 128); net.Add(new CudnnPooling(), GenPoolingConf("pool2", true, 2, 2, 0)); ConvBNReLU(net, "conv3_1", 256); - net.Add(new CudnnDropout(), GenDropoutConf("drop3_1", 0.4)); + net.Add(new Dropout(), GenDropoutConf("drop3_1", 0.4)); ConvBNReLU(net, "conv3_2", 256); - net.Add(new CudnnDropout(), GenDropoutConf("drop3_2", 0.4)); + net.Add(new Dropout(), GenDropoutConf("drop3_2", 0.4)); ConvBNReLU(net, "conv3_3", 256); net.Add(new CudnnPooling(), GenPoolingConf("pool3", true, 2, 2, 0)); ConvBNReLU(net, "conv4_1", 512); - net.Add(new CudnnDropout(), GenDropoutConf("drop4_1", 0.4)); + net.Add(new Dropout(), GenDropoutConf("drop4_1", 0.4)); ConvBNReLU(net, "conv4_2", 512); - net.Add(new CudnnDropout(), GenDropoutConf("drop4_2", 0.4)); + net.Add(new Dropout(), GenDropoutConf("drop4_2", 0.4)); ConvBNReLU(net, "conv4_3", 512); net.Add(new CudnnPooling(), GenPoolingConf("pool4", true, 2, 2, 0)); ConvBNReLU(net, "conv5_1", 512); - net.Add(new CudnnDropout(), GenDropoutConf("drop5_1", 0.4)); + net.Add(new Dropout(), GenDropoutConf("drop5_1", 0.4)); ConvBNReLU(net, "conv5_2", 512); - net.Add(new CudnnDropout(), GenDropoutConf("drop5_2", 0.4)); + net.Add(new Dropout(), GenDropoutConf("drop5_2", 0.4)); ConvBNReLU(net, "conv5_3", 512); net.Add(new CudnnPooling(), GenPoolingConf("pool5", true, 2, 2, 0)); net.Add(new Flatten(), GenFlattenConf("flat")); - net.Add(new CudnnDropout(), GenDropoutConf("flat_drop", 0.5)); + net.Add(new Dropout(), GenDropoutConf("flat_drop", 0.5)); net.Add(new Dense(), GenDenseConf("ip1", 512, 0.02)); net.Add(new CudnnBatchNorm(), GenBatchNormConf("ip1_bn")); net.Add(new CudnnActivation(), GenReLUConf("ip1_relu")); - net.Add(new CudnnDropout(), GenDropoutConf("ip1_drop", 0.5)); + net.Add(new Dropout(), GenDropoutConf("ip1_drop", 0.5)); net.Add(new Dense(), GenDenseConf("ip2", 10, 0.02)); return net; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/vgg.py ---------------------------------------------------------------------- diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py index 8063307..0b9bb56 100644 --- a/examples/cifar10/vgg.py +++ b/examples/cifar10/vgg.py @@ -1,12 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +""" The VGG model is adapted from http://torch.ch/blog/2015/07/30/cifar.html. +The best validation accuracy we achieved is about 89% without data augmentation. +The performance could be improved by tuning some hyper-parameters, including +learning rate, weight decay, max_epoch, parameter initialization, etc. +""" + import sys import os +import math sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python')) + from singa import layer +from singa import initializer from singa import metric from singa import loss from singa import net as ffnet -from singa.proto import core_pb2 + def ConvBnReLU(net, name, nb_filers, sample_shape=None): net.add(layer.Conv2D(name + '_1', nb_filers, 3, 1, pad=1, @@ -14,39 +39,58 @@ def ConvBnReLU(net, name, nb_filers, sample_shape=None): net.add(layer.BatchNormalization(name + '_2')) net.add(layer.Activation(name + '_3')) + def create_net(): net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy()) ConvBnReLU(net, 'conv1_1', 64, (3, 32, 32)) - net.add(layer.Dropout('drop1', 0.3, engine='cudnn')) + net.add(layer.Dropout('drop1', 0.3, engine='cuda')) ConvBnReLU(net, 'conv1_2', 64) net.add(layer.MaxPooling2D('pool1', 2, 2, border_mode='valid')) ConvBnReLU(net, 'conv2_1', 128) - net.add(layer.Dropout('drop2_1', 0.4, engine='cudnn')) + net.add(layer.Dropout('drop2_1', 0.4, engine='cuda')) ConvBnReLU(net, 'conv2_2', 128) net.add(layer.MaxPooling2D('pool2', 2, 2, border_mode='valid')) ConvBnReLU(net, 'conv3_1', 256) - net.add(layer.Dropout('drop3_1', 0.4, engine='cudnn')) + net.add(layer.Dropout('drop3_1', 0.4, engine='cuda')) ConvBnReLU(net, 'conv3_2', 256) - net.add(layer.Dropout('drop3_2', 0.4, engine='cudnn')) + net.add(layer.Dropout('drop3_2', 0.4, engine='cuda')) ConvBnReLU(net, 'conv3_3', 256) net.add(layer.MaxPooling2D('pool3', 2, 2, border_mode='valid')) ConvBnReLU(net, 'conv4_1', 512) - net.add(layer.Dropout('drop4_1', 0.4, engine='cudnn')) + net.add(layer.Dropout('drop4_1', 0.4, engine='cuda')) ConvBnReLU(net, 'conv4_2', 512) - net.add(layer.Dropout('drop4_2', 0.4, engine='cudnn')) + net.add(layer.Dropout('drop4_2', 0.4, engine='cuda')) ConvBnReLU(net, 'conv4_3', 512) net.add(layer.MaxPooling2D('pool4', 2, 2, border_mode='valid')) ConvBnReLU(net, 'conv5_1', 512) - net.add(layer.Dropout('drop5_1', 0.4, engine='cudnn')) + net.add(layer.Dropout('drop5_1', 0.4, engine='cuda')) ConvBnReLU(net, 'conv5_2', 512) - net.add(layer.Dropout('drop5_2', 0.4, engine='cudnn')) + net.add(layer.Dropout('drop5_2', 0.4, engine='cuda')) ConvBnReLU(net, 'conv5_3', 512) net.add(layer.MaxPooling2D('pool5', 2, 2, border_mode='valid')) net.add(layer.Flatten('flat')) - net.add(layer.Dropout('drop_flat', 0.5, engine='cudnn')) + net.add(layer.Dropout('drop_flat', 0.5, engine='cuda')) net.add(layer.Dense('ip1', 512)) net.add(layer.BatchNormalization('batchnorm_ip1')) net.add(layer.Activation('relu_ip1')) - net.add(layer.Dropout('drop_ip2', 0.5, engine='cudnn')) + net.add(layer.Dropout('drop_ip2', 0.5, engine='cuda')) net.add(layer.Dense('ip2', 10)) + print 'Start intialization............' + for (p, name) in zip(net.param_values(), net.param_names()): + print name, p.shape + if len(p.shape) > 1: + if 'mean' in name or 'beta' in name: + p.set_value(0.0) + elif 'var' in name: + p.set_value(1.0) + elif 'gamma' in name: + initializer.uniform(p, 0, 1) + elif 'conv' in name: + initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0]))) + else: + initializer.gaussian(p, 0, 0.02) + else: + p.set_value(0) + print name, p.l1() + return net