initial cuda run

49ceb0fe · Chao Liu · 2f2cf35b · 49ceb0fe · 49ceb0fe
Commit 49ceb0fe authored Oct 22, 2018 by Chao Liu
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 17 deletions

src/include/device_tensor.cuh src/include/device_tensor.cuh +12 -14

src/include/direct_convolution.cuh src/include/direct_convolution.cuh +3 -3

No files found.
--- a/src/include/device_tensor.cuh
+++ b/src/include/device_tensor.cuh
@@ -14,26 +14,24 @@ struct DeviceTensorDescriptor
        checkCudaErrors(cudaMalloc(&mpLengths, data_sz * mDim));
        checkCudaErrors(cudaMalloc(&mpStrides, data_sz * mDim));

-        checkCudaErrors(
-            cudaMemcpy(const_cast<void*>(static_cast<const void*>(host_desc.GetLengths().data())),
-                       mpLengths,
-                       data_sz * mDim,
-                       cudaMemcpyHostToDevice));
-        checkCudaErrors(
-            cudaMemcpy(const_cast<void*>(static_cast<const void*>(host_desc.GetStrides().data())),
-                       mpStrides,
-                       data_sz * mDim,
-                       cudaMemcpyHostToDevice));
+        checkCudaErrors(cudaMemcpy(
+            mpLengths, host_desc.GetLengths().data(), data_sz * mDim, cudaMemcpyHostToDevice));
+        checkCudaErrors(cudaMemcpy(
+            mpStrides, host_desc.GetStrides().data(), data_sz * mDim, cudaMemcpyHostToDevice));
    }

    __host__ ~DeviceTensorDescriptor()
    {
-        checkCudaErrors(cudaFree(mpLengths));
-        checkCudaErrors(cudaFree(mpStrides));
+#if 0
+        if(mpLengths != nullptr)
+            checkCudaErrors(cudaFree(mpLengths));
+        if(mpStrides != nullptr)
+            checkCudaErrors(cudaFree(mpStrides));
+#endif
    }

    DataType_t mDataType;
    unsigned long mDim;
-    unsigned long* mpLengths;
-    unsigned long* mpStrides;
+    unsigned long* mpLengths = nullptr;
+    unsigned long* mpStrides = nullptr;
 };
--- a/src/include/direct_convolution.cuh
+++ b/src/include/direct_convolution.cuh
@@ -3,10 +3,10 @@

 template <class TFloat, int NBlockDim>
 __global__ void direct_convolution(DeviceTensorDescriptor in_desc,
-                                   TFloat* const in,
+                                   TFloat* const p_in,
                                   DeviceTensorDescriptor wei_desc,
-                                   TFloat* const wei,
+                                   TFloat* const p_wei,
                                   DeviceTensorDescriptor out_desc,
-                                   TFloat* out)
+                                   TFloat* p_out)
 {
 }