From commits-return-35586-archive-asf-public=cust-asf.ponee.io@mxnet.incubator.apache.org Wed May 16 00:44:35 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 1CC95180634 for ; Wed, 16 May 2018 00:44:33 +0200 (CEST) Received: (qmail 33729 invoked by uid 500); 15 May 2018 22:44:33 -0000 Mailing-List: contact commits-help@mxnet.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mxnet.incubator.apache.org Delivered-To: mailing list commits@mxnet.incubator.apache.org Received: (qmail 33720 invoked by uid 99); 15 May 2018 22:44:33 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 15 May 2018 22:44:33 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 6FDA580A3C; Tue, 15 May 2018 22:44:32 +0000 (UTC) Date: Tue, 15 May 2018 22:44:31 +0000 To: "commits@mxnet.apache.org" Subject: [incubator-mxnet] branch master updated: handle fallback correctly for write inplace when the array is MKLDNN. (#10651) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <152642427093.24794.2111947672552978191@gitbox.apache.org> From: jxie@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: incubator-mxnet X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 65061dc93710afce92edfe548aa3352473da3cdb X-Git-Newrev: bae13329d0c1d510223b3f453e44ec2394323540 X-Git-Rev: bae13329d0c1d510223b3f453e44ec2394323540 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. jxie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git The following commit(s) were added to refs/heads/master by this push: new bae1332 handle fallback correctly for write inplace when the array is MKLDNN. (#10651) bae1332 is described below commit bae13329d0c1d510223b3f453e44ec2394323540 Author: Da Zheng AuthorDate: Tue May 15 15:44:23 2018 -0700 handle fallback correctly for write inplace when the array is MKLDNN. (#10651) * handle writeinplace correctly for mkldnn arrays. * Add unit tests. * Fix a bug in mkldnn copy. * Fix a bug in ndarray copy. * Verify results. --- src/common/exec_utils.h | 12 ++- src/executor/attach_op_execs_pass.cc | 7 +- src/imperative/imperative_utils.h | 10 +- src/ndarray/ndarray.cc | 5 +- src/operator/nn/mkldnn/mkldnn_copy.cc | 8 +- tests/cpp/operator/mkldnn.cc | 192 +++++++++++++++++++++++++++++++++- 6 files changed, 217 insertions(+), 17 deletions(-) diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h index 4881e0f..731d03d 100644 --- a/src/common/exec_utils.h +++ b/src/common/exec_utils.h @@ -78,8 +78,8 @@ inline bool SetupDefaultBlobsIn(const std::vector& src, } inline bool SetupDefaultBlobsOut(const std::vector& src, - const std::vector &req, const std::vector *bufs, + std::vector *req, std::vector *blobs, std::vector *temp_src, std::vector *temp_dst) { @@ -88,6 +88,12 @@ inline bool SetupDefaultBlobsOut(const std::vector& src, auto& nd = src[i]; bool is_default = nd.storage_type() == kDefaultStorage; #if MXNET_USE_MKLDNN == 1 + if (req->at(i) == kWriteInplace && nd.IsMKLDNNData()) + // If it's write inplace and the output array doesn't use the default + // layout, we'll generate a temporary output array below, which means + // the input array and the output array are no longer the same array. + // we should change the request type. + req->at(i) = kWriteTo; // We have to make sure it's default storage and default layout. is_default = nd.IsDefaultData(); #endif @@ -117,9 +123,9 @@ inline bool SetupDefaultBlobsOut(const std::vector& src, */ inline void SetupDefaultBlobsInOut(const std::vector &ndinputs, const std::vector &ndoutputs, - const std::vector &req, const std::vector *in_bufs, const std::vector *out_bufs, + std::vector *req, std::vector *input_blobs, std::vector *output_blobs, std::vector *pre_temp_src, @@ -132,7 +138,7 @@ inline void SetupDefaultBlobsInOut(const std::vector &ndinputs, SetupDefaultBlobsIn(ndinputs, in_bufs, input_blobs, pre_temp_src, pre_temp_dst, in_temp_idx_map); // populate output blobs - SetupDefaultBlobsOut(ndoutputs, req, out_bufs, output_blobs, post_temp_dst, + SetupDefaultBlobsOut(ndoutputs, out_bufs, req, output_blobs, post_temp_dst, post_temp_src); // add mutable inputs to post temp list for (const auto idx : mutate_idx) { diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index f7ac772..697e486 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -78,7 +78,8 @@ class StorageFallbackOpExecutor : public OpExecutor { pre_temp_src_.clear(); pre_temp_dst_.clear(); post_temp_src_.clear(); post_temp_dst_.clear(); in_temp_idx_map_.clear(); - SetupDefaultBlobsInOut(in_array, out_array, req, &pre_temp_buf_, &post_temp_buf_, + tmp_req = req; + SetupDefaultBlobsInOut(in_array, out_array, &pre_temp_buf_, &post_temp_buf_, &req, &in_data_, &out_data_, &pre_temp_src_, &pre_temp_dst_, &post_temp_src_, &post_temp_dst_, @@ -89,8 +90,12 @@ class StorageFallbackOpExecutor : public OpExecutor { // storage fallback after fcompute is completed void PostFCompute(bool is_gpu) { common::CastNonDefaultStorage(post_temp_src_, post_temp_dst_, op_ctx, is_gpu); + req = tmp_req; } + // output requirement on each output array. + // This temporarily saves the original output requirements. + std::vector tmp_req; // default storage tensor blobs for fcompute std::vector in_data_, out_data_; // These are NDArray buffers for cast storage. diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index 10a011e..d7bb37b 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -373,8 +373,9 @@ inline void PushFCompute(const FCompute& fn, #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(outputs, req); #endif + std::vector tmp_req = req; // setup blobs - SetupDefaultBlobsInOut(inputs, outputs, req, nullptr, nullptr, + SetupDefaultBlobsInOut(inputs, outputs, nullptr, nullptr, &tmp_req, &input_blobs, &output_blobs, &pre_temp_src, &pre_temp_dst, &post_temp_src, &post_temp_dst, &in_temp_idx_map, mutate_idx); // setup context @@ -382,7 +383,7 @@ inline void PushFCompute(const FCompute& fn, bool is_gpu = ctx.dev_mask() == gpu::kDevMask; // pre-fcompute fallback, cast to default storage type CastNonDefaultStorage(pre_temp_src, pre_temp_dst, opctx, is_gpu); - fn(attrs, opctx, input_blobs, req, output_blobs); + fn(attrs, opctx, input_blobs, tmp_req, output_blobs); // post-fcompute fallback, cast to original storage type CastNonDefaultStorage(post_temp_src, post_temp_dst, opctx, is_gpu); if (is_gpu) { @@ -492,15 +493,16 @@ inline void PushOperator(const OpStatePtr& state, #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(outputs, req); #endif + std::vector tmp_req = req; // populate input blobs and output blobs - SetupDefaultBlobsInOut(inputs, outputs, req, nullptr, nullptr, + SetupDefaultBlobsInOut(inputs, outputs, nullptr, nullptr, &tmp_req, &input_blobs, &output_blobs, &pre_temp_src, &pre_temp_dst, &post_temp_src, &post_temp_dst, &in_temp_idx_map, mutate_idx); // setup contexts bool is_gpu = rctx.get_ctx().dev_mask() == gpu::kDevMask; // pre-fcompute fallback CastNonDefaultStorage(pre_temp_src, pre_temp_dst, opctx, is_gpu); - fcompute(state, opctx, input_blobs, req, output_blobs); + fcompute(state, opctx, input_blobs, tmp_req, output_blobs); // post-fcompute fallback, cast to original storage type, if necessary CastNonDefaultStorage(post_temp_src, post_temp_dst, opctx, is_gpu); if (is_gpu && exec_type == ExecType::kSync) { diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 507b783..d87e8bc 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -1118,9 +1118,8 @@ inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext to_mem->get_primitive_desc().get_size()); memcpy(to_mem->get_data_handle(), from_mem->get_data_handle(), size); } else { - std::vector net; - net.push_back(mkldnn::reorder(*from_mem, *to_mem)); - mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait(); + const_cast(to).CopyFrom(*from_mem); + MKLDNNStream::Get()->Submit(); } } else { // In this case, one of the NDArray isn't supported by MKLDNN, we need diff --git a/src/operator/nn/mkldnn/mkldnn_copy.cc b/src/operator/nn/mkldnn/mkldnn_copy.cc index 4bfb7fa..75e51af 100644 --- a/src/operator/nn/mkldnn/mkldnn_copy.cc +++ b/src/operator/nn/mkldnn/mkldnn_copy.cc @@ -35,7 +35,13 @@ void MKLDNNCopy(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const NDArray &in_data, const OpReqType &req, const NDArray &out_data) { TmpMemMgr::Get()->Init(ctx.requested[0]); - auto in_mem = in_data.GetMKLDNNData(); + + // If the input data is a view of an MKLDNN array, we should create a new + // NDArray with reordered data. + NDArray data = in_data; + if (data.IsMKLDNNData() && data.IsView()) + data = data.Reorder2Default(); + auto in_mem = data.GetMKLDNNData(); if (req == kAddTo) { TmpMemMgr::Get()->Init(ctx.requested[0]); // We should try and force the output memory has the same format diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc index c20cd75..bbff531 100644 --- a/tests/cpp/operator/mkldnn.cc +++ b/tests/cpp/operator/mkldnn.cc @@ -26,6 +26,7 @@ #if MXNET_USE_MKLDNN == 1 #include "gtest/gtest.h" +#include "mxnet/imperative.h" #include "../../src/operator/nn/mkldnn/mkldnn_base-inl.h" using namespace mxnet; @@ -97,12 +98,18 @@ static void InitArray(NDArray *arr) { } // Init arrays with the specified layout. -static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive_desc &pd) { +static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive_desc &pd, + bool is_rand = false) { const TBlob &blob = arr->data(); mshadow::default_real_t *data = blob.dptr(); size_t size = blob.Size(); - for (size_t i = 0; i < size; i++) - data[i] = i; + if (is_rand) { + for (size_t i = 0; i < size; i++) + data[i] = std::rand(); + } else { + for (size_t i = 0; i < size; i++) + data[i] = i; + } arr->MKLDNNDataReorderAsync(pd); arr->WaitToRead(); } @@ -206,7 +213,7 @@ static std::vector GetMKLDNNFormat(size_t num_dims, int } struct TestArrayShapes { - std::vector shapes; + std::vector shapes; std::vector pds; }; @@ -239,7 +246,7 @@ static TestArrayShapes GetTestArrayShapes() { { // 4D TShape s1(4); - s1[0] = 1; s1[1] = 96; s1[2] = 54; s1[3] = 54; + s1[0] = 10; s1[1] = 96; s1[2] = 54; s1[3] = 54; shapes.push_back(s1); pds.push_back(GetMemPD(s1, dtype, mkldnn::memory::format::nchw)); @@ -332,4 +339,179 @@ TEST(MKLDNN_NDArray, GetDataReorder) { } } +struct OpAttrs { + nnvm::NodeAttrs attrs; + std::vector dispatches; +}; + +OpAttrs GetCopyOp() { + OpAttrs attrs; + attrs.attrs.op = Op::Get("_copy"); + attrs.dispatches.resize(2); + attrs.dispatches[0] = DispatchMode::kFCompute; + attrs.dispatches[1] = DispatchMode::kFComputeEx; + return attrs; +} + +OpAttrs GetLeakyReluOp() { + OpAttrs attrs; + attrs.attrs.op = Op::Get("LeakyReLU"); + attrs.dispatches.resize(1); + attrs.dispatches[0] = DispatchMode::kFCompute; + return attrs; +} + +/* + * We want to get a few types of NDArrays for testing: + * 1. Normal NDArray + * 2. Normal NDArray with MKLDNN layout (output from an MKLDNN operator) + * 3. Normal NDArray with MKLDNN layout whose MKLDNN memory may have different + * dimensions from the NDArray (result of MKLDNNDataReorderAsync). However, this + * type of NDArrays only exists for weight arrays. I don't think we should + * pass them to all operators. + * In the inference mode, the MKLDNN memory in the weight array will be + * reordered to 5 dimensions. + * 4. Reshaped/sliced NDArray + * 5. Reshaped/sliced NDArray with MKLDNN layout (reshape/slice from Normal NDArray + * with MKLDNN layout) + * 6. Reshaped/sliced NDArray with MKLDNN layout whose MKLDNN memory may have + * different dimensions from the NDArray (result of MKLDNNDataReorderAsync). + * However, this type of NDArrays only exists for weight arrays. I don't think + * we should pass them to all operators. + * In the inference mode, the MKLDNN memory in the weight array will be + * reordered to 5 dimensions. + * + */ +std::vector GetTestInputArrays() { + TestArrayShapes tas = GetTestArrayShapes(); + std::vector shapes = tas.shapes; + std::vector pds = tas.pds; + + std::vector in_arrs; + for (auto shape : shapes) { + in_arrs.emplace_back(shape, Context()); + InitArray(&in_arrs.back()); + for (auto pd : pds) { + if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t)) + continue; + + in_arrs.emplace_back(shape, Context()); + InitMKLDNNArray(&in_arrs.back(), pd); + + // Get a sliced version. + NDArray arr(shape, Context()); + InitMKLDNNArray(&arr, pd); + arr = arr.Slice(1, arr.shape()[0] - 1); + in_arrs.emplace_back(arr); + } + } + return in_arrs; +} + +/* + * We want to get a few types of NDArrays for testing: + * 1. Normal NDArray + * 2. Normal NDArray with MKLDNN layout (output from an MKLDNN operator) + * 3. Normal NDArray with MKLDNN layout whose MKLDNN memory may have different + * dimensions from the NDArray (result of MKLDNNDataReorderAsync). However, this + * type of NDArrays only exists for weight arrays. I don't think we should + * pass them to all operators. + * In the inference mode, the MKLDNN memory in the weight array will be + * reordered to 5 dimensions. + * 4. Reused NDArray (this is created by the MXNet executor). This type of + * NDArrays can only be used as output arrays. + */ +std::vector GetTestOutputArrays(const TShape &shape, + const std::vector &pds) { + std::vector in_arrs; + in_arrs.emplace_back(shape, Context()); + InitArray(&in_arrs.back()); + + // Get a reused version. + nnvm::TShape s(1); + s[0] = shape.Size(); + NDArray arr(s, Context()); + arr = arr.AsArray(shape, arr.dtype()); + InitArray(&arr); + in_arrs.emplace_back(arr); + + for (auto pd : pds) { + if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t)) + continue; + + in_arrs.emplace_back(shape, Context()); + InitMKLDNNArray(&in_arrs.back(), pd, true); + + // Get a reused version. + nnvm::TShape s(1); + s[0] = shape.Size(); + arr = NDArray(s, Context()); + arr = arr.AsArray(shape, arr.dtype()); + InitMKLDNNArray(&arr, pd, true); + in_arrs.emplace_back(arr); + } + return in_arrs; +} + +using VerifyFunc = std::function; + +void VerifyCopyResult(const NDArray &in_arr, const NDArray &arr) { + NDArray tmp1 = in_arr.Reorder2Default(); + NDArray tmp2 = arr.Reorder2Default(); + EXPECT_EQ(tmp1.shape().Size(), tmp2.shape().Size()); + TBlob d1 = tmp1.data(); + TBlob d2 = tmp2.data(); + EXPECT_EQ(memcmp(d1.dptr_, d2.dptr_, + tmp1.shape().Size() * sizeof(mshadow::default_real_t)), 0); +} + +void TestUnaryOp(const OpAttrs &attrs, VerifyFunc verify_fn) { + std::vector inputs(1); + std::vector outputs(1); + std::vector req(1); + std::vector dispatches = attrs.dispatches; + + TestArrayShapes tas = GetTestArrayShapes(); + std::vector pds = tas.pds; + + std::vector in_arrs = GetTestInputArrays(); + for (auto in_arr : in_arrs) { + for (auto dispatch : dispatches) { + std::vector out_arrs = GetTestOutputArrays(in_arr.shape(), pds); + for (auto out_arr : out_arrs) { + req[0] = kWriteTo; + inputs[0] = &in_arr; + outputs[0] = &out_arr; + Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs, + outputs, req, dispatch, mxnet::OpStatePtr()); + out_arr.WaitToRead(); + verify_fn(in_arr, out_arr); + } + } + } + + for (auto dispatch : dispatches) { + in_arrs = GetTestInputArrays(); + for (auto arr : in_arrs) { + // If the array is a view, we shouldn't write data to it. + if (arr.IsView()) + continue; + + NDArray orig = arr.Copy(arr.ctx()); + req[0] = kWriteInplace; + inputs[0] = &arr; + outputs[0] = &arr; + Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs, outputs, req, + dispatch, mxnet::OpStatePtr()); + arr.WaitToRead(); + verify_fn(orig, arr); + } + } +} + +TEST(IMPERATIVE, UnaryOp) { + OpAttrs attrs = GetCopyOp(); + TestUnaryOp(attrs, VerifyCopyResult); +} + #endif -- To stop receiving notification emails like this one, please contact jxie@apache.org.