Commit a21b0d27 authored by Chao Liu's avatar Chao Liu
Browse files

refactor

parent 6790b8f3
...@@ -178,7 +178,7 @@ int main() ...@@ -178,7 +178,7 @@ int main()
for(int i = 0; i < 20; ++i) for(int i = 0; i < 20; ++i)
{ {
device_direct_convolution_2(in_desc, in, wei_desc, wei, out_desc, out_device); device_direct_convolution_1(in_desc, in, wei_desc, wei, out_desc, out_device);
} }
#if 0 #if 0
......
...@@ -55,7 +55,7 @@ void device_direct_convolution_1( ...@@ -55,7 +55,7 @@ void device_direct_convolution_1(
cudaEventCreate(&start); cudaEventCreate(&start);
cudaEventRecord(start, 0); cudaEventRecord(start, 0);
gridwise_convolution<T, gridwise_direct_convolution_1<T,
InDesc, InDesc,
WeiDesc, WeiDesc,
OutDesc, OutDesc,
......
...@@ -59,7 +59,7 @@ void device_direct_convolution_2( ...@@ -59,7 +59,7 @@ void device_direct_convolution_2(
cudaEventCreate(&start); cudaEventCreate(&start);
cudaEventRecord(start, 0); cudaEventRecord(start, 0);
gridwise_convolution<T, gridwise_direct_convolution_2<T,
InDesc, InDesc,
WeiDesc, WeiDesc,
OutDesc, OutDesc,
......
...@@ -20,7 +20,7 @@ template <class TFloat, ...@@ -20,7 +20,7 @@ template <class TFloat,
unsigned NBlockOpLen3, unsigned NBlockOpLen3,
unsigned BlockSize, unsigned BlockSize,
unsigned GridSize> unsigned GridSize>
__global__ void gridwise_convolution(InGlobalDesc, __global__ void gridwise_direct_convolution_1(InGlobalDesc,
TFloat* const __restrict__ p_in_global, TFloat* const __restrict__ p_in_global,
WeiGlobalDesc, WeiGlobalDesc,
TFloat* const __restrict__ p_wei_global, TFloat* const __restrict__ p_wei_global,
......
...@@ -25,7 +25,7 @@ template <class TFloat, ...@@ -25,7 +25,7 @@ template <class TFloat,
unsigned NBlockOpLen3, unsigned NBlockOpLen3,
unsigned BlockSize, unsigned BlockSize,
unsigned GridSize> unsigned GridSize>
__global__ void gridwise_convolution(InGlobalDesc, __global__ void gridwise_direct_convolution_2(InGlobalDesc,
TFloat* const __restrict__ p_in_global, TFloat* const __restrict__ p_in_global,
WeiGlobalDesc, WeiGlobalDesc,
TFloat* const __restrict__ p_wei_global, TFloat* const __restrict__ p_wei_global,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment