Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 94C87200AE4 for ; Fri, 24 Jun 2016 08:51:46 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 9350B160A38; Fri, 24 Jun 2016 06:51:46 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 19591160A5A for ; Fri, 24 Jun 2016 08:51:44 +0200 (CEST) Received: (qmail 34605 invoked by uid 500); 24 Jun 2016 06:51:44 -0000 Mailing-List: contact commits-help@singa.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@singa.incubator.apache.org Delivered-To: mailing list commits@singa.incubator.apache.org Received: (qmail 34594 invoked by uid 99); 24 Jun 2016 06:51:44 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 24 Jun 2016 06:51:44 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 875971A57A6 for ; Fri, 24 Jun 2016 06:51:43 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -4.646 X-Spam-Level: X-Spam-Status: No, score=-4.646 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-1.426] autolearn=disabled Received: from mx2-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id 5DkEPPDaunja for ; Fri, 24 Jun 2016 06:51:37 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx2-lw-eu.apache.org (ASF Mail Server at mx2-lw-eu.apache.org) with SMTP id 9BFD05F36A for ; Fri, 24 Jun 2016 06:51:35 +0000 (UTC) Received: (qmail 34454 invoked by uid 99); 24 Jun 2016 06:51:34 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 24 Jun 2016 06:51:34 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 7DF17E09C7; Fri, 24 Jun 2016 06:51:34 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wangwei@apache.org To: commits@singa.incubator.apache.org Date: Fri, 24 Jun 2016 06:51:37 -0000 Message-Id: In-Reply-To: <4c896dfa47c34cb19fa31a48f32f8f3c@git.apache.org> References: <4c896dfa47c34cb19fa31a48f32f8f3c@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [4/6] incubator-singa git commit: changed all device pointer to shared pointer archived-at: Fri, 24 Jun 2016 06:51:46 -0000 changed all device pointer to shared pointer Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5651383f Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5651383f Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5651383f Branch: refs/heads/dev Commit: 5651383f5dbe0ab17eeda70f491d837a24bcb4ab Parents: 077d13e Author: liyuchenmike@gmail.com Authored: Wed Jun 22 21:06:38 2016 +0800 Committer: liyuchenmike@gmail.com Committed: Wed Jun 22 21:06:38 2016 +0800 ---------------------------------------------------------------------- include/singa/core/device.h | 7 ++-- include/singa/core/tensor.h | 10 ++--- include/singa/model/layer.h | 2 +- src/core/device/cpp_cpu.cc | 2 +- src/core/device/cuda_gpu.cc | 5 --- src/core/device/device.cc | 2 +- src/core/memory/memory.cc | 3 -- src/core/tensor/tensor.cc | 19 +++++----- src/model/layer/batchnorm.cc | 2 +- src/model/layer/batchnorm.h | 2 +- src/model/layer/dense.cc | 2 +- src/model/layer/dense.h | 2 +- src/model/layer/dropout.cc | 2 +- src/model/layer/dropout.h | 2 +- test/singa/test_dense.cc | 33 +++++++---------- test/singa/test_memory.cc | 6 +-- test/singa/test_mse.cc | 17 ++++----- test/singa/test_sgd.cc | 8 ++-- test/singa/test_tensor.cc | 6 +-- test/singa/test_tensor_math.cc | 74 ++++++++++++++++++------------------- 20 files changed, 94 insertions(+), 112 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/include/singa/core/device.h ---------------------------------------------------------------------- diff --git a/include/singa/core/device.h b/include/singa/core/device.h index fc98a23..d2b5b12 100644 --- a/include/singa/core/device.h +++ b/include/singa/core/device.h @@ -23,6 +23,7 @@ #include #include #include +#include #include "singa/singa_config.h" #include "singa/core/common.h" #include "singa/core/memory.h" @@ -75,7 +76,7 @@ class Device { return lang_; } - Device* host() const { return host_;} + std::shared_ptr host() const { return host_;} Context* context(int k) { return &ctx_; @@ -107,7 +108,7 @@ class Device { // SafeQueue op_queue_; // SafeQueue op_log_; /// The host device - Device* host_; + std::shared_ptr host_; // TODO(wangwei) define multiple contexts, one per executor Context ctx_; }; @@ -134,7 +135,7 @@ class CppCPU : public Device { }; /// a singleton CppDevice as the host for all devices. -extern CppCPU defaultDevice; +extern std::shared_ptr defaultDevice; // Implement Device using OpenCL libs. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/include/singa/core/tensor.h ---------------------------------------------------------------------- diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h index bb8d7f8..8f73047 100644 --- a/include/singa/core/tensor.h +++ b/include/singa/core/tensor.h @@ -67,8 +67,8 @@ class Tensor { Tensor(); explicit Tensor(Shape &&shape, DataType dtype = kFloat32); explicit Tensor(const Shape &shape, DataType dtype = kFloat32); - Tensor(Shape &&shape, Device *dev, DataType dtype = kFloat32); - Tensor(const Shape &shape, Device *dev, DataType dtype = kFloat32); + Tensor(Shape &&shape, std::shared_ptr dev, DataType dtype = kFloat32); + Tensor(const Shape &shape, std::shared_ptr dev, DataType dtype = kFloat32); /// Copy Tensor to share the internal data. No deep copy. Tensor(const Tensor &from); @@ -80,7 +80,7 @@ class Tensor { /// blob_ is allocated in constructors. Blob *blob() const { return blob_; } - Device *device() const { return device_; } + std::shared_ptr device() const { return device_; } /// Return immutable Tensor values with given type. template @@ -125,7 +125,7 @@ class Tensor { /// Reset the device. /// If the target device is a diff device, then do deep data copy. - void ToDevice(Device *dev); + void ToDevice(std::shared_ptr dev); /// Equivalent to ToDevice(host_dev). void ToHost(); @@ -192,7 +192,7 @@ class Tensor { protected: bool transpose_ = false; DataType data_type_ = kFloat32; - Device *device_ = nullptr; + std::shared_ptr device_ = nullptr; /// Note: blob_ is allocated in lazy manner to avoid frequent malloc/free. /// If you want to get an allocated Blob, use blob() instead of blob_. Blob *blob_ = nullptr; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/include/singa/model/layer.h ---------------------------------------------------------------------- diff --git a/include/singa/model/layer.h b/include/singa/model/layer.h index 82c8edc..ee2b42b 100644 --- a/include/singa/model/layer.h +++ b/include/singa/model/layer.h @@ -125,7 +125,7 @@ class Layer { /// Move the layer (including its parameters and other internal Tensor) onto /// the given device - virtual void ToDevice(Device* device) { + virtual void ToDevice(std::shared_ptr device) { //for (auto p : param_values_) p->ToDevice(device); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/device/cpp_cpu.cc ---------------------------------------------------------------------- diff --git a/src/core/device/cpp_cpu.cc b/src/core/device/cpp_cpu.cc index 44f614a..6884e35 100644 --- a/src/core/device/cpp_cpu.cc +++ b/src/core/device/cpp_cpu.cc @@ -17,7 +17,7 @@ */ #include "singa/core/device.h" namespace singa { -CppCPU defaultDevice(-1, 1); +std::shared_ptr defaultDevice=std::make_shared(-1, 1); CppCPU::CppCPU(int id, int num_executors, string scheduler, string vm) : Device(id, num_executors, scheduler, vm) { lang_ = kCpp; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/device/cuda_gpu.cc ---------------------------------------------------------------------- diff --git a/src/core/device/cuda_gpu.cc b/src/core/device/cuda_gpu.cc index d9a0985..4da292f 100644 --- a/src/core/device/cuda_gpu.cc +++ b/src/core/device/cuda_gpu.cc @@ -43,7 +43,6 @@ CudaGPU::~CudaGPU() { } #endif delete pool; - LOG(INFO) << "device has been deleted"; } CudaGPU::CudaGPU(int id, int num_executors, @@ -143,14 +142,10 @@ void* CudaGPU::Malloc(int size) { /// Free cpu memory. void CudaGPU::Free(void* ptr) { - LOG(INFO) << "Cuda free is called"; - LOG(INFO) << "pool pointer" << pool << "\n"; - LOG(INFO) << "pool status:" << ((CnMemPool*)pool)->status; if (ptr != nullptr) { //CUDA_CHECK(cudaFree(ptr)); pool->Free(ptr); } - LOG(INFO) << "free memory is successed"; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/device/device.cc ---------------------------------------------------------------------- diff --git a/src/core/device/device.cc b/src/core/device/device.cc index 1d3c446..1889339 100644 --- a/src/core/device/device.cc +++ b/src/core/device/device.cc @@ -22,7 +22,7 @@ namespace singa { Device::Device(int id, int num_executors, string scheduler, string vm) : id_(id), num_executors_(num_executors) { // TODO(wangwei) create scheduler and vm. - host_ = &defaultDevice; + host_ = defaultDevice; } void Device::Exec(function&& fn, const vector read_blobs, http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/memory/memory.cc ---------------------------------------------------------------------- diff --git a/src/core/memory/memory.cc b/src/core/memory/memory.cc index c5878a6..304c101 100644 --- a/src/core/memory/memory.cc +++ b/src/core/memory/memory.cc @@ -60,7 +60,6 @@ CnMemPool::~CnMemPool() { initialized = false; } mtx.unlock(); - LOG(INFO) << "cnmem has been freed"; } @@ -70,10 +69,8 @@ void CnMemPool::Malloc(void** ptr, const size_t size) { } void CnMemPool::Free(void* ptr) { - LOG(INFO) << "cnmem free is called !!!!!!!!!!!"; cnmemStatus_t status = cnmemFree(ptr,NULL); CHECK_EQ(status, cnmemStatus_t::CNMEM_STATUS_SUCCESS) << " " << cnmemGetErrorString(status); - LOG(INFO) << "cnmem free is terminated"; } void CudaMemPool::Malloc(void** ptr, const size_t size) { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/tensor/tensor.cc ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc index 5ae375c..a5b43d8 100644 --- a/src/core/tensor/tensor.cc +++ b/src/core/tensor/tensor.cc @@ -25,29 +25,28 @@ namespace singa { Tensor::~Tensor() { - // LOG(ERROR) << "~"; if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); blob_ = nullptr; } -Tensor::Tensor() { device_ = &defaultDevice; } +Tensor::Tensor() { device_ = defaultDevice; } Tensor::Tensor(const Shape &shape, DataType dtype) - : data_type_(dtype), device_(&defaultDevice), shape_(shape) { - device_ = &defaultDevice; + : data_type_(dtype), device_(defaultDevice), shape_(shape) { + device_ = defaultDevice; blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_)); } Tensor::Tensor(Shape &&shape, DataType dtype) - : data_type_(dtype), device_(&defaultDevice), shape_(shape) { - device_ = &defaultDevice; + : data_type_(dtype), device_(defaultDevice), shape_(shape) { + device_ = defaultDevice; blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_)); } -Tensor::Tensor(const Shape &shape, Device *device, DataType dtype) +Tensor::Tensor(const Shape &shape, std::shared_ptr device, DataType dtype) : data_type_(dtype), device_(device), shape_(shape) { blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_)); } -Tensor::Tensor(Shape &&shape, Device *device, DataType dtype) +Tensor::Tensor(Shape &&shape, std::shared_ptr device, DataType dtype) : data_type_(dtype), device_(device), shape_(shape) { blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_)); } @@ -104,7 +103,7 @@ void Tensor::AsType(DataType type) { } } -void Tensor::ToDevice(Device *dst) { +void Tensor::ToDevice(std::shared_ptr dst) { // TODO(wangwei) the comparison is very strict. May compare against device ID? if (device_ != dst) { Tensor tmp(shape_, dst, data_type_); @@ -234,7 +233,7 @@ void CopyDataToFrom(Tensor *dst, const Tensor &src, size_t num, CHECK_GE(src.MemSize(), src_offset + nBytes); CHECK_GE(dst->MemSize(), dst_offset + nBytes); - Device *src_dev = src.device(), *dst_dev = dst->device(); + std::shared_ptr src_dev = src.device(), dst_dev = dst->device(); Blob *from = src.blob(), *to = dst->blob(); if (dst_dev->lang() != src_dev->lang()) { // let the none cpp device conduct copy op http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/batchnorm.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc index bcd0870..1e6c39b 100644 --- a/src/model/layer/batchnorm.cc +++ b/src/model/layer/batchnorm.cc @@ -44,7 +44,7 @@ void BatchNorm::Setup(const LayerConf& conf) { param_values_.push_back(&runningVariance_); } -void BatchNorm::ToDevice(Device* device) { +void BatchNorm::ToDevice(std::shared_ptr device) { bnScale_.ToDevice(device); bnBias_.ToDevice(device); dbnScale_.ToDevice(device); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/batchnorm.h ---------------------------------------------------------------------- diff --git a/src/model/layer/batchnorm.h b/src/model/layer/batchnorm.h index 0255179..83f143d 100644 --- a/src/model/layer/batchnorm.h +++ b/src/model/layer/batchnorm.h @@ -67,7 +67,7 @@ class BatchNorm : public Layer { runningVariance_.ResetLike(x); runningVariance_.CopyData(x); } - virtual void ToDevice(Device* device) override; + virtual void ToDevice(std::shared_ptr device) override; protected: float factor_; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/dense.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc index b349787..d47c1db 100644 --- a/src/model/layer/dense.cc +++ b/src/model/layer/dense.cc @@ -79,7 +79,7 @@ const std::pair> Dense::Backward(int flag, return std::make_pair(dx, param_grad); } -void Dense::ToDevice(Device *device) { +void Dense::ToDevice(std::shared_ptr device) { weight_.ToDevice(device); bias_.ToDevice(device); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/dense.h ---------------------------------------------------------------------- diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h index a5a6f66..49cb986 100644 --- a/src/model/layer/dense.h +++ b/src/model/layer/dense.h @@ -40,7 +40,7 @@ class Dense : public Layer { const std::pair> Backward(int flag, const Tensor& grad) override; - void ToDevice(Device* device) override; + void ToDevice(std::shared_ptr device) override; size_t num_output() const { return hdim_; } size_t num_input() const { return vdim_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/dropout.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/dropout.cc b/src/model/layer/dropout.cc index c2c97be..695008e 100644 --- a/src/model/layer/dropout.cc +++ b/src/model/layer/dropout.cc @@ -52,7 +52,7 @@ const std::pair> Dropout::Backward(int flag, return std::make_pair(input_grad, param_grad); } -void Dropout::ToDevice(Device* device) { +void Dropout::ToDevice(std::shared_ptr device) { Layer::ToDevice(device); mask_.ToDevice(device); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/dropout.h ---------------------------------------------------------------------- diff --git a/src/model/layer/dropout.h b/src/model/layer/dropout.h index 5efaf6a..d5da79c 100644 --- a/src/model/layer/dropout.h +++ b/src/model/layer/dropout.h @@ -43,7 +43,7 @@ class Dropout : public Layer { const std::pair> Backward(int flag, const Tensor& grad) override; - void ToDevice(Device* device) override; + void ToDevice(std::shared_ptr device) override; float dropout_ratio() const { return dropout_ratio_; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_dense.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc index 052d0e8..7ed4d33 100644 --- a/test/singa/test_dense.cc +++ b/test/singa/test_dense.cc @@ -66,7 +66,6 @@ TEST(Dense, ForwardCpp) { dense.set_bias(bias); singa::Tensor out1 = dense.Forward(singa::kTrain, in); - singa::CppCPU host(0, 1); const float *outptr1 = out1.data(); EXPECT_EQ(9u, out1.Size()); for (int i = 0; i < 3; i++) @@ -76,7 +75,6 @@ TEST(Dense, ForwardCpp) { outptr1[i * 3 + j]); } #endif // USE_CBLAS -#ifdef USE_CUDA TEST(Dense, BackwardCpp) { Dense dense; @@ -89,7 +87,6 @@ TEST(Dense, BackwardCpp) { const size_t batchsize = 3, vdim = 2, hdim = 3; const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - singa::CudaGPU cuda(0, 1); singa::Tensor in(singa::Shape{batchsize, vdim}); in.CopyDataFromHostPtr(x, batchsize * vdim); @@ -114,7 +111,6 @@ TEST(Dense, BackwardCpp) { grad.CopyDataFromHostPtr(dy, batchsize * hdim); const auto ret = dense.Backward(singa::kTrain, grad); - singa::CppCPU host(0, 1); singa::Tensor in_grad = ret.first; singa::Tensor dweight = ret.second.at(0); singa::Tensor dbias = ret.second.at(1); @@ -139,7 +135,6 @@ TEST(Dense, BackwardCpp) { for (int i = 0; i < 3; i++) EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]); } -#endif #ifdef USE_CUDA TEST(Dense, ForwardCuda) { @@ -154,25 +149,24 @@ TEST(Dense, ForwardCuda) { const size_t batchsize = 3, vdim = 2, hdim = 3; const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda); + auto cuda = std::make_shared(0, 1); + singa::Tensor in(singa::Shape{batchsize, vdim}, cuda); in.CopyDataFromHostPtr(x, batchsize * vdim); // set weight const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; - singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda); + singa::Tensor weight(singa::Shape{hdim, vdim}, cuda); weight.CopyDataFromHostPtr(we, hdim * vdim); const float bia[hdim] = {1.0f, 1.0f, 1.0f}; - singa::Tensor bias(singa::Shape{hdim}, &cuda); + singa::Tensor bias(singa::Shape{hdim}, cuda); bias.CopyDataFromHostPtr(bia, hdim); dense.set_weight(weight); dense.set_bias(bias); singa::Tensor out1 = dense.Forward(singa::kTrain, in); - singa::CppCPU host(0, 1); - out1.ToDevice(&host); + out1.ToHost(); const float *outptr1 = out1.data(); EXPECT_EQ(9u, out1.Size()); for (int i = 0; i < 3; i++) @@ -193,17 +187,17 @@ TEST(Dense, BackwardCuda) { const size_t batchsize = 3, vdim = 2, hdim = 3; const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda); + auto cuda = std::make_shared(0, 1); + singa::Tensor in(singa::Shape{batchsize, vdim}, cuda); in.CopyDataFromHostPtr(x, batchsize * vdim); // set weight const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; - singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda); + singa::Tensor weight(singa::Shape{hdim, vdim}, cuda); weight.CopyDataFromHostPtr(we, hdim * vdim); const float bia[hdim] = {1.0f, 1.0f, 1.0f}; - singa::Tensor bias(singa::Shape{hdim}, &cuda); + singa::Tensor bias(singa::Shape{hdim}, cuda); bias.CopyDataFromHostPtr(bia, hdim); dense.set_weight(weight); @@ -214,15 +208,14 @@ TEST(Dense, BackwardCuda) { // grad const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f}; - singa::Tensor grad(singa::Shape{batchsize, hdim}, &cuda); + singa::Tensor grad(singa::Shape{batchsize, hdim}, cuda); grad.CopyDataFromHostPtr(dy, batchsize * hdim); const auto ret = dense.Backward(singa::kTrain, grad); - singa::CppCPU host(0, 1); singa::Tensor in_grad = ret.first; singa::Tensor dweight = ret.second.at(0); singa::Tensor dbias = ret.second.at(1); - in_grad.ToDevice(&host); + in_grad.ToHost(); const float *dx = in_grad.data(); EXPECT_EQ(6u, in_grad.Size()); for (int i = 0; i < 3; i++) @@ -231,7 +224,7 @@ TEST(Dense, BackwardCuda) { (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + dy[i * 3 + 2] * we[2 * 2 + j]), dx[i * 2 + j]); - dweight.ToDevice(&host); + dweight.ToHost(); const float *dweightx = dweight.data(); EXPECT_EQ(6u, dweight.Size()); for (int i = 0; i < 3; i++) @@ -240,7 +233,7 @@ TEST(Dense, BackwardCuda) { (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]); - dbias.ToDevice(&host); + dbias.ToHost(); const float *dbiasx = dbias.data(); EXPECT_EQ(3u, dbias.Size()); for (int i = 0; i < 3; i++) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_memory.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_memory.cc b/test/singa/test_memory.cc index f5e464d..90fc99a 100644 --- a/test/singa/test_memory.cc +++ b/test/singa/test_memory.cc @@ -75,7 +75,7 @@ TEST(MemPool, CompareCudaCnmem) { singa::CnMemPool cnPool; cnPool.InitPool(); - int numOfTests = 10000; + int numOfTests = 5000; int allocSize = 1000000U; struct timeval start,end; double t1,t2; @@ -93,7 +93,7 @@ TEST(MemPool, CompareCudaCnmem) { t1 = start.tv_sec * 1000 + start.tv_usec/1000; t2 = end.tv_sec * 1000 + end.tv_usec/1000; - LOG(INFO) << "cnmem time: " << t2-t1 << " ms" << std::endl; + LOG(INFO) << "cnmem memory time: " << t2-t1 << " ms" << std::endl; pool = &cudaPool; gettimeofday(&start,NULL); @@ -106,6 +106,6 @@ TEST(MemPool, CompareCudaCnmem) { t1 = start.tv_sec * 1000 + start.tv_usec/1000; t2 = end.tv_sec * 1000 + end.tv_usec/1000; - LOG(INFO) << "cuda time: " << t2-t1 << " ms" << std::endl; + LOG(INFO) << "cuda memory time: " << t2-t1 << " ms" << std::endl; } #endif // USE_CUDA http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_mse.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_mse.cc b/test/singa/test_mse.cc index 7c6066e..d2c5125 100644 --- a/test/singa/test_mse.cc +++ b/test/singa/test_mse.cc @@ -69,9 +69,9 @@ TEST_F(TestMSE, CppBackward) { #ifdef USE_CUDA TEST_F(TestMSE, CudaForward) { singa::MSE* mse = new singa::MSE(); - singa::CudaGPU dev; - p.ToDevice(&dev); - t.ToDevice(&dev); + auto dev = std::make_shared(); + p.ToDevice(dev); + t.ToDevice(dev); Tensor loss = mse->Forward(p, t); loss.ToHost(); @@ -85,18 +85,15 @@ TEST_F(TestMSE, CudaForward) { } EXPECT_FLOAT_EQ(ldat[i], 0.5 * l); } - LOG(INFO) << "Before delete pxxxxxxxxxxxxxxxxxxxxxxxx"; p.ToHost(); - LOG(INFO) << "Before delete tyyyyyyyyyyyyyyyyyyyyyyy"; t.ToHost(); - LOG(INFO) << "terminate-xxxxxxxxxxxxxxxxxx-"; - delete mse; } + TEST_F(TestMSE, CudaBackward) { singa::MSE mse; - singa::CudaGPU dev; - p.ToDevice(&dev); - t.ToDevice(&dev); + auto dev = std::make_shared(); + p.ToDevice(dev); + t.ToDevice(dev); mse.Forward(p, t); Tensor grad = mse.Backward(); grad.ToHost(); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_sgd.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_sgd.cc b/test/singa/test_sgd.cc index 71ab15e..3b04ab6 100644 --- a/test/singa/test_sgd.cc +++ b/test/singa/test_sgd.cc @@ -88,8 +88,8 @@ TEST(SGD, ApplyWithoutMomentumCuda) { const float v[4] = {0.1, 0.2, 0.3, 0.4}; const float g[4] = {0.1, 0.1, 0.1, 0.1}; - singa::CudaGPU dev; - singa::Tensor value(singa::Shape{4}, &dev), grad(singa::Shape{4}, &dev); + auto dev = std::make_shared(); + singa::Tensor value(singa::Shape{4}, dev), grad(singa::Shape{4}, dev); value.CopyDataFromHostPtr(v, 4); grad.CopyDataFromHostPtr(g, 4); @@ -124,8 +124,8 @@ TEST(SGD, ApplyWithMomentumCuda) { const float v[4] = {0.1, 0.2, 0.3, 0.4}; const float g[4] = {0.01, 0.02, 0.03, 0.04}; - singa::CudaGPU dev; - singa::Tensor value(singa::Shape{4}, &dev), grad(singa::Shape{4}, &dev); + auto dev = std::make_shared(); + singa::Tensor value(singa::Shape{4}, dev), grad(singa::Shape{4}, dev); value.CopyDataFromHostPtr(v, 4); grad.CopyDataFromHostPtr(g, 4); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_tensor.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc index bd039ad..c351174 100644 --- a/test/singa/test_tensor.cc +++ b/test/singa/test_tensor.cc @@ -59,10 +59,10 @@ TEST(TensorClass, AsType) { TEST(TensorClass, ToDevice) { Tensor t(Shape{2,3}); - EXPECT_EQ(static_cast(&singa::defaultDevice), t.device()); - singa::CppCPU *dev = new singa::CppCPU(0, 1); + EXPECT_EQ(singa::defaultDevice, t.device()); + auto dev = std::make_shared(0, 1); t.ToDevice(dev); - EXPECT_NE(static_cast(&singa::defaultDevice), t.device()); + EXPECT_NE(singa::defaultDevice, t.device()); } TEST(TensorClass, CopyDataFromHostPtr) { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_tensor_math.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc index b18e465..0f998c0 100644 --- a/test/singa/test_tensor_math.cc +++ b/test/singa/test_tensor_math.cc @@ -255,10 +255,10 @@ TEST_F(TestTensorMath, SumColumnsCpp) { #ifdef USE_CUDA TEST_F(TestTensorMath, MultCuda) { const float x[4] = {1.0f, 2.0f, 3.0f, 4.0f}; - singa::CudaGPU dev; - Tensor t(Shape{2, 2}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{2, 2}, dev); t.CopyDataFromHostPtr(x, 4); - d.ToDevice(&dev); + d.ToDevice(dev); d.CopyDataFromHostPtr(dat1, 6); Tensor C = Mult(d, t); C.ToHost(); @@ -274,7 +274,7 @@ TEST_F(TestTensorMath, MultCuda) { } const float y[8] = {1.0f, 2.0f, 3.0f, 4.0f, 1.1f, 2.1f, 3.1f, 4.1f}; - Tensor s(Shape{4, 2}, &dev); + Tensor s(Shape{4, 2}, dev); s.CopyDataFromHostPtr(y, 8); Tensor D = Mult(d, s.T()); D.ToHost(); @@ -288,11 +288,11 @@ TEST_F(TestTensorMath, MultCuda) { EXPECT_FLOAT_EQ(DPtr[i * 4 + j], tmp); } } - Tensor p(Shape{4, 1}, &dev); + Tensor p(Shape{4, 1}, dev); p.CopyDataFromHostPtr(x, 4); - Tensor q(Shape{1, 4}, &dev); + Tensor q(Shape{1, 4}, dev); q.SetValue(1.0f); - Tensor o(Shape{4, 4}, &dev); + Tensor o(Shape{4, 4}, dev); Mult(p, q, &o); o.ToHost(); @@ -308,11 +308,11 @@ TEST_F(TestTensorMath, MultCuda) { TEST_F(TestTensorMath, AddColumnCuda) { const float x[3] = {1.0f, 2.0f, 3.0f}; - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{3}, dev); t.CopyDataFromHostPtr(x, 3); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); AddColumn(t, &d); d.ToHost(); const float *xptr = d.data(); @@ -326,11 +326,11 @@ TEST_F(TestTensorMath, AddColumnCuda) { TEST_F(TestTensorMath, SubColumnCuda) { const float x[3] = {1.0f, 2.0f, 3.0f}; - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{3}, dev); t.CopyDataFromHostPtr(x, 3); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); SubColumn(t, &d); d.ToHost(); const float *xptr = d.data(); @@ -357,11 +357,11 @@ TEST_F(TestTensorMath, MultColumnCpp) { #ifdef USE_CUDA TEST_F(TestTensorMath, MultColumnCuda) { const float x[3] = {1.0f, 2.0f, 3.0f}; - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{3}, dev); t.CopyDataFromHostPtr(x, 3); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); MultColumn(t, &d); d.ToHost(); const float *xptr = d.data(); @@ -373,11 +373,11 @@ TEST_F(TestTensorMath, MultColumnCuda) { } TEST_F(TestTensorMath, DivColumnCuda) { const float x[3] = {1.0f, 2.0f, 3.0f}; - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{3}, dev); t.CopyDataFromHostPtr(x, 3); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); DivColumn(t, &d); d.ToHost(); const float *xptr = d.data(); @@ -389,11 +389,11 @@ TEST_F(TestTensorMath, DivColumnCuda) { } TEST_F(TestTensorMath, AddRowCuda) { const float x[2] = {1.1f, 2.1f}; - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{2}, dev); t.CopyDataFromHostPtr(x, 2); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); AddRow(t, &d); d.ToHost(); const float *xptr = d.data(); @@ -405,11 +405,11 @@ TEST_F(TestTensorMath, AddRowCuda) { } TEST_F(TestTensorMath, SubRowCuda) { const float x[2] = {1.1f, 2.1f}; - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{2}, dev); t.CopyDataFromHostPtr(x, 2); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); SubRow(t, &d); d.ToHost(); const float *xptr = d.data(); @@ -421,11 +421,11 @@ TEST_F(TestTensorMath, SubRowCuda) { } TEST_F(TestTensorMath, MultRowCuda) { const float x[2] = {1.1f, 2.1f}; - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{2}, dev); t.CopyDataFromHostPtr(x, 2); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); MultRow(t, &d); d.ToHost(); const float *xptr = d.data(); @@ -452,11 +452,11 @@ TEST_F(TestTensorMath, DivRowCpp) { #ifdef USE_CUDA TEST_F(TestTensorMath, DivRowCuda) { const float x[2] = {1.1f, 2.1f}; - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{2}, dev); t.CopyDataFromHostPtr(x, 2); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); DivRow(t, &d); d.ToHost(); const float *xptr = d.data(); @@ -467,10 +467,10 @@ TEST_F(TestTensorMath, DivRowCuda) { } } TEST_F(TestTensorMath, SumRowsCuda) { - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{2}, dev); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); SumRows(d, &t); t.ToHost(); const float *tptr = t.data(); @@ -484,10 +484,10 @@ TEST_F(TestTensorMath, SumRowsCuda) { d.ToHost(); } TEST_F(TestTensorMath, SumColumnCuda) { - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); + auto dev = std::make_shared(); + Tensor t(Shape{3}, dev); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); SumColumns(d, &t); t.ToHost(); const float *tptr = t.data();