tvm-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [incubator-tvm] wrongtest commented on a change in pull request #4459: [RUNTIME] Implement TVMDSOOp(TensorFlow custom op) for TVM runtime
Date Mon, 30 Mar 2020 03:46:30 GMT
wrongtest commented on a change in pull request #4459: [RUNTIME] Implement TVMDSOOp(TensorFlow
custom op) for TVM runtime
URL: https://github.com/apache/incubator-tvm/pull/4459#discussion_r399912342
 
 

 ##########
 File path: src/contrib/tf_op/tvm_dso_op_kernels.cc
 ##########
 @@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifdef TF_TVMDSOOP_ENABLE_GPU
+#include <cuda_runtime.h>
+#endif
+#include <dlpack/dlpack.h>
+
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/device_api.h>
+#include <tvm/runtime/registry.h>
+#include <tvm/runtime/packed_func.h>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "index_seq.h"
+
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+typedef tensorflow::gtl::InlinedVector<tensorflow::int64, 4> ShapeContainer;
+
+using tensorflow::OpKernel;
+using tensorflow::OpKernelConstruction;
+using tensorflow::OpKernelContext;
+
+
+// Op utility trait for diffrent device type template
+template <typename DEVICE_TYPE>
+class TVMDSOOpTrait;
+
+
+// Buffer information used for actual computation.
+// Each buffer is associated with one TensorFlow tensor
+// whose underlying buffer is record into "origin_buf".
+// For input tensor, we copy data from origin_buf to buf
+// and for output tensor, copy data from buf to origin_buf
+class TensorAsBuf {
+ public:
+    tensorflow::Tensor inline_tensor;
+    tensorflow::Tensor* tensor;
+
+    size_t size;
+    size_t offset;
+
+    int device_type;
+
+    char* origin_buf;
+    char* buf;
+
+    void CopyToOrigin() {
+        if (buf == origin_buf) {
+            return;
+        }
+        if (device_type == kDLCPU) {
+            memcpy(origin_buf, buf + offset, size);
+#ifdef TF_TVMDSOOP_ENABLE_GPU
+        } else if (device_type == kDLGPU) {
+            cudaMemcpy(origin_buf, buf + offset,
+                size, cudaMemcpyDeviceToDevice);
+#endif
+        } else {
+            LOG(FATAL) << "Only support CPU and CUDA now. Device "
+                << device_type << " is not implemented currently";
+        }
+    }
+
+    void CopyFromOrigin() {
+        if (buf == origin_buf) {
+            return;
+        }
+        if (device_type == kDLCPU) {
+            memcpy(buf + offset, origin_buf, size);
+#ifdef TF_TVMDSOOP_ENABLE_GPU
+        } else if (device_type == kDLGPU) {
+            cudaMemcpy(buf + offset, origin_buf,
+                size, cudaMemcpyDeviceToDevice);
+#endif
+        } else {
+            LOG(FATAL) << "Only support CPU and CUDA now. Device "
+                << device_type << " is not implemented currently";
+        }
+    }
+};
+
+
+tensorflow::Status GetDLPackDtype(const tensorflow::Tensor& tf_tensor,
+                                  DLDataType* res) {
+    auto dtype = tf_tensor.dtype();
+    if (dtype == tensorflow::DT_FLOAT) {
+      *res = {kDLFloat, 32, 1};
+    } else if (dtype == tensorflow::DT_INT64) {
+      *res = {kDLInt, 64, 1};
+    } else if (dtype == tensorflow::DT_INT32) {
+      *res = {kDLInt, 32, 1};
+    } else {
+      return tensorflow::Status(tensorflow::error::INTERNAL,
+          "Fail to get dlpack datatype");
+    }
+    return tensorflow::Status::OK();
+}
+
+
+// Ensure buffer used for actual computation take 64byte alignment
 
 Review comment:
   We refer to https://github.com/apache/incubator-tvm/blob/master/src/runtime/ndarray.cc#L57-L61;
The api creating NDArray (such as NDArray::Empty) require at least 64bytes alloc alignment
and currently we do not support TensorFlow types larger than that. So we just use alignment=64
as if we fit TVM NDArray arguments into PackedFunc. 
   
   By the way, TensorFlow memory allocator seems also use 64byte as default alignment thus
additional memcpy here may not actually happen.
   
   I think 64byte (512bits) alignment is to ensure AVX512 can be used?  

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message