Commit a21b0d27 authored by Chao Liu's avatar Chao Liu
Browse files

refactor

parent 6790b8f3
......@@ -178,7 +178,7 @@ int main()
for(int i = 0; i < 20; ++i)
{
device_direct_convolution_2(in_desc, in, wei_desc, wei, out_desc, out_device);
device_direct_convolution_1(in_desc, in, wei_desc, wei, out_desc, out_device);
}
#if 0
......
......@@ -55,7 +55,7 @@ void device_direct_convolution_1(
cudaEventCreate(&start);
cudaEventRecord(start, 0);
gridwise_convolution<T,
gridwise_direct_convolution_1<T,
InDesc,
WeiDesc,
OutDesc,
......
......@@ -59,7 +59,7 @@ void device_direct_convolution_2(
cudaEventCreate(&start);
cudaEventRecord(start, 0);
gridwise_convolution<T,
gridwise_direct_convolution_2<T,
InDesc,
WeiDesc,
OutDesc,
......
......@@ -20,7 +20,7 @@ template <class TFloat,
unsigned NBlockOpLen3,
unsigned BlockSize,
unsigned GridSize>
__global__ void gridwise_convolution(InGlobalDesc,
__global__ void gridwise_direct_convolution_1(InGlobalDesc,
TFloat* const __restrict__ p_in_global,
WeiGlobalDesc,
TFloat* const __restrict__ p_wei_global,
......
......@@ -25,7 +25,7 @@ template <class TFloat,
unsigned NBlockOpLen3,
unsigned BlockSize,
unsigned GridSize>
__global__ void gridwise_convolution(InGlobalDesc,
__global__ void gridwise_direct_convolution_2(InGlobalDesc,
TFloat* const __restrict__ p_in_global,
WeiGlobalDesc,
TFloat* const __restrict__ p_wei_global,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment