Commit 49ceb0fe authored by Chao Liu's avatar Chao Liu
Browse files

initial cuda run

parent 2f2cf35b
......@@ -14,26 +14,24 @@ struct DeviceTensorDescriptor
checkCudaErrors(cudaMalloc(&mpLengths, data_sz * mDim));
checkCudaErrors(cudaMalloc(&mpStrides, data_sz * mDim));
checkCudaErrors(
cudaMemcpy(const_cast<void*>(static_cast<const void*>(host_desc.GetLengths().data())),
mpLengths,
data_sz * mDim,
cudaMemcpyHostToDevice));
checkCudaErrors(
cudaMemcpy(const_cast<void*>(static_cast<const void*>(host_desc.GetStrides().data())),
mpStrides,
data_sz * mDim,
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(
mpLengths, host_desc.GetLengths().data(), data_sz * mDim, cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(
mpStrides, host_desc.GetStrides().data(), data_sz * mDim, cudaMemcpyHostToDevice));
}
__host__ ~DeviceTensorDescriptor()
{
checkCudaErrors(cudaFree(mpLengths));
checkCudaErrors(cudaFree(mpStrides));
#if 0
if(mpLengths != nullptr)
checkCudaErrors(cudaFree(mpLengths));
if(mpStrides != nullptr)
checkCudaErrors(cudaFree(mpStrides));
#endif
}
DataType_t mDataType;
unsigned long mDim;
unsigned long* mpLengths;
unsigned long* mpStrides;
unsigned long* mpLengths = nullptr;
unsigned long* mpStrides = nullptr;
};
......@@ -3,10 +3,10 @@
template <class TFloat, int NBlockDim>
__global__ void direct_convolution(DeviceTensorDescriptor in_desc,
TFloat* const in,
TFloat* const p_in,
DeviceTensorDescriptor wei_desc,
TFloat* const wei,
TFloat* const p_wei,
DeviceTensorDescriptor out_desc,
TFloat* out)
TFloat* p_out)
{
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment