mxnet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] eric-haibin-lin commented on a change in pull request #10371: [MXNET-263] [WIP] Support for dot(dns, csr) = dns and dot(dns, csr.T) = dns on GPU
Date Tue, 03 Apr 2018 21:29:12 GMT
eric-haibin-lin commented on a change in pull request #10371: [MXNET-263] [WIP] Support for
dot(dns, csr) = dns and dot(dns, csr.T) = dns on GPU
URL: https://github.com/apache/incubator-mxnet/pull/10371#discussion_r178965893
 
 

 ##########
 File path: src/operator/tensor/dot.cu
 ##########
 @@ -23,10 +23,142 @@
  */
 
 #include "./dot-inl.h"
+#include <cub/cub.cuh>
 
 namespace mxnet {
 namespace op {
 
+template<typename gpu>
+inline void DotDnsCsrCsrImpl(const OpContext& ctx,
+                             const TBlob& lhs, const NDArray& rhs,
+                             const OpReqType req, NDArray* ret) {
+  LOG(FATAL) << "dot(dense, csr) = csr is not implemented on GPU";
+}
+
+/*
+ * \brief GPU Impl of dot(dns, csr) = dns and dot(dns, csr.T) = dns
+ */
+template<typename gpu>
+inline void DotDnsCsrDnsImpl(const OpContext& ctx,
+                             const TBlob& dns, const NDArray& rhs,
+                             const OpReqType req, NDArray* ret,
+                             const bool transpose_b) {
+  CHECK_EQ(req, kWriteTo);
+  CHECK_EQ(rhs.storage_type(), kCSRStorage);
+
+  using namespace mshadow;
+  using namespace mshadow::expr;
+  using nnvm::dim_t;
+
+  /* Initialize data structures */
+  mshadow::Stream<gpu>* s = ctx.get_stream<gpu>();
+  TBlob csr_data = rhs.data();
+  TBlob csr_indices = rhs.aux_data(csr::kIdx);
+  TBlob csr_indptr = rhs.aux_data(csr::kIndPtr);
+  if (!rhs.storage_initialized()) {
+    FillZerosCsrImpl(s, *ret);
+    return;
+  }
+
+  // if dot(dense, csr) = dns, transform to csc first
+  if (!transpose_b) {
+    LOG(FATAL) << "dot(dns, csr) = dns not implemented yet";
+    const nnvm::dim_t csr_rows = rhs.shape()[0];
+    const nnvm::dim_t csr_cols = rhs.shape()[1];
+    const nnvm::dim_t nnz = rhs.storage_shape().Size();
+
+    MSHADOW_SGL_DBL_TYPE_SWITCH(csr_data.type_flag_, DType, {
+      MSHADOW_IDX_TYPE_SWITCH(csr_indices.type_flag_, IType, {
+        MSHADOW_IDX_TYPE_SWITCH(csr_indptr.type_flag_, CType, {
+          mshadow::Stream<cpu>* cpu_s = ctx.get_stream<cpu>();
+          Tensor<gpu, 1, DType> csc_data =
+            ctx.requested[0].get_space_typed<gpu, 1, DType>(
+              Shape1(nnz), s);
+          Tensor<gpu, 1, IType> csc_indices =
+            ctx.requested[0].get_space_typed<gpu, 1, IType>(
+              Shape1(nnz), s);
+          Tensor<gpu, 1, CType> csc_indptr =
+            ctx.requested[0].get_space_typed<gpu, 1, CType>(
+              Shape1(csr_cols + 1), s);
+          DType data_buf[nnz] = {0};
+          Tensor<cpu, 1, DType> csc_data_c =
+            ctx.requested[0].get_space_typed<cpu, 1, DType>(
+              Shape1(nnz), cpu_s);
+          /* Tensor<cpu, 1, IType> csc_indices_c = */
+          /*   ctx.requested[0].get_space_typed<cpu, 1, IType>( */
+          /*     Shape1(nnz), cpu_s); */
+          /* Tensor<cpu, 1, CType> csc_indptr_c = */
+          /*   ctx.requested[0].get_space_typed<cpu, 1, CType>( */
+          /*     Shape1(csr_cols + 1), cpu_s); */
+          // reset values for indptr, ready for histogramming
+          mxnet_op::Kernel<mxnet_op::set_zero, gpu>::Launch(
+            s, csr_cols + 1, csc_indptr.dptr_);
+          // histogramming on col id
+          mxnet_op::Kernel<CsrTransHistogramKernel, gpu>::Launch(
+            s, nnz, csr_indices.dptr<IType>(),
+            csc_indptr.dptr_, nnz);
+          size_t temp_storage_bytes = 0;
+          // Get necessary temporary storage amount
+          cub::DeviceScan::ExclusiveSum(nullptr,
+                                        temp_storage_bytes,
+                                        csc_indptr.dptr_,
+                                        csc_indptr.dptr_,
+                                        csr_cols+1,
+                                        Stream<gpu>::GetStream(s));
+          LOG(INFO) << "temp storage bytes: " << temp_storage_bytes;
+          Tensor<gpu, 1, CType> workspace =
+            ctx.requested[0].get_space_typed<gpu, 1, CType>(
+              Shape1(temp_storage_bytes), s);
+          cub::DeviceScan::ExclusiveSum(workspace.dptr_,
+                                        temp_storage_bytes,
+                                        csc_indptr.dptr_,
+                                        csc_indptr.dptr_,
+                                        csr_cols+1,
+                                        Stream<gpu>::GetStream(s));
+          Tensor<gpu, 1, int> col_counters =
+            ctx.requested[0].get_space_typed<gpu, 1, int>(Shape1(csr_cols+1), s);
+          // reset values for indptr, ready for histogramming
+          mxnet_op::Kernel<mxnet_op::set_zero, gpu>::Launch(
+            s, csr_cols+1, col_counters.dptr_);
+          mxnet_op::Kernel<CscDataIndicesKernel, gpu>::Launch(
+            s, csr_rows, csr_data.dptr<DType>(), csr_indices.dptr<IType>(),
+            csr_indptr.dptr<CType>(), csc_data.dptr_, csc_indices.dptr_,
+            csc_indptr.dptr_, col_counters.dptr_, csr_rows, csr_cols);
+
+          cudaMemcpy(data_buf, csc_data.dptr_, nnz * sizeof(DType), cudaMemcpyDeviceToHost);
+          for (int i = 0; i < nnz; ++i) {
+            LOG(INFO) << data_buf[i] << " ";
+          }
+          LOG(INFO) << "\n";
+          CType out_num_rows = ret->shape()[0];
+          CType out_num_cols = ret->shape()[1];
+          mxnet_op::Kernel<DotDnsCsrTransDnsKernel, gpu>::Launch(
+            s, out_num_rows * out_num_cols, dns.dptr<DType>(),
+            csc_data.dptr_, csc_indices.dptr_, csc_indptr.dptr_,
+            ret->data().dptr<DType>(), dns.shape_[1],
+            out_num_rows, out_num_cols);
+        });
+      });
+    });
+  } else {
+    MSHADOW_SGL_DBL_TYPE_SWITCH(csr_data.type_flag_, DType, {     // data type
+      MSHADOW_IDX_TYPE_SWITCH(csr_indices.type_flag_, IType, {     // indptr type
+        MSHADOW_IDX_TYPE_SWITCH(csr_indptr.type_flag_, CType, {  // colidx type
+          /* Allocate workspace */
 
 Review comment:
   Remove this comment?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message