Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
20423a35
Commit
20423a35
authored
Dec 11, 2018
by
Chao Liu
Browse files
tune direct
parent
1eafc9c1
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
78 additions
and
46 deletions
+78
-46
driver/conv.cu
driver/conv.cu
+24
-40
driver/device_direct_convolution_1.cuh
driver/device_direct_convolution_1.cuh
+20
-3
driver/device_direct_convolution_2.cuh
driver/device_direct_convolution_2.cuh
+34
-3
No files found.
driver/conv.cu
View file @
20423a35
...
@@ -325,57 +325,41 @@ int main()
...
@@ -325,57 +325,41 @@ int main()
#if 0
#if 0
constexpr unsigned N = 1;
constexpr unsigned N = 1;
constexpr unsigned C = 1;
constexpr unsigned C = 1;
constexpr unsigned HI =
3
4;
constexpr unsigned HI = 4;
constexpr unsigned WI =
3
4;
constexpr unsigned WI = 4;
constexpr unsigned K = 1;
constexpr unsigned K = 1;
constexpr unsigned S = 3;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
constexpr unsigned R = 3;
#elif
1
#elif
0
constexpr
unsigned
N
=
64
;
constexpr
unsigned
N
=
1
;
constexpr
unsigned
C
=
256
;
constexpr
unsigned
C
=
1
;
constexpr
unsigned
HI
=
34
;
constexpr
unsigned
HI
=
34
;
constexpr
unsigned
WI
=
34
;
constexpr
unsigned
WI
=
34
;
constexpr
unsigned
K
=
64
;
constexpr
unsigned
K
=
1
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
constexpr
unsigned
R
=
3
;
#elif 0
#elif 0
constexpr
unsigned
N
=
72
;
constexpr
unsigned
N
=
64
;
constexpr
unsigned
C
=
2
88
;
constexpr
unsigned
C
=
2
56
;
constexpr
unsigned
HI
=
3
8
;
constexpr
unsigned
HI
=
3
4
;
constexpr
unsigned
WI
=
3
8
;
constexpr
unsigned
WI
=
3
4
;
constexpr
unsigned
K
=
72
;
constexpr
unsigned
K
=
64
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
constexpr
unsigned
R
=
3
;
#elif
0
#elif
1
constexpr
unsigned
N
=
1
;
constexpr
unsigned
N
=
64
;
constexpr
unsigned
C
=
1
;
constexpr
unsigned
C
=
64
;
constexpr
unsigned
HI
=
18
;
constexpr
unsigned
HI
=
56
;
constexpr
unsigned
WI
=
18
;
constexpr
unsigned
WI
=
56
;
constexpr
unsigned
K
=
1
;
constexpr
unsigned
K
=
64
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
constexpr
unsigned
R
=
3
;
#elif 0
#elif 0
constexpr
unsigned
N
=
1
;
constexpr
unsigned
N
=
64
;
constexpr
unsigned
C
=
1
;
constexpr
unsigned
C
=
64
;
constexpr
unsigned
HI
=
4
;
constexpr
unsigned
HI
=
66
;
constexpr
unsigned
WI
=
4
;
constexpr
unsigned
WI
=
66
;
constexpr
unsigned
K
=
1
;
constexpr
unsigned
K
=
64
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
#elif 0
constexpr
unsigned
N
=
2
;
constexpr
unsigned
C
=
3
;
constexpr
unsigned
HI
=
130
;
constexpr
unsigned
WI
=
130
;
constexpr
unsigned
K
=
5
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
#elif 0
constexpr
unsigned
N
=
3
;
constexpr
unsigned
C
=
16
;
constexpr
unsigned
HI
=
130
;
constexpr
unsigned
WI
=
130
;
constexpr
unsigned
K
=
4
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
constexpr
unsigned
R
=
3
;
#endif
#endif
...
@@ -397,7 +381,7 @@ int main()
...
@@ -397,7 +381,7 @@ int main()
std::size_t num_thread = std::thread::hardware_concurrency();
std::size_t num_thread = std::thread::hardware_concurrency();
in.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
in.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
wei.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
wei.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
#elif
0
#elif
1
std
::
size_t
num_thread
=
std
::
thread
::
hardware_concurrency
();
std
::
size_t
num_thread
=
std
::
thread
::
hardware_concurrency
();
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
wei
.
GenerateTensorValue
(
GeneratorTensor_2
{
-
5
,
5
},
num_thread
);
...
@@ -412,7 +396,7 @@ int main()
...
@@ -412,7 +396,7 @@ int main()
#endif
#endif
}
}
#if
0
#if
1
host_winograd_3x3_convolution
(
in
,
wei
,
out_host
);
host_winograd_3x3_convolution
(
in
,
wei
,
out_host
);
check_error
(
out_host
,
out_device
);
check_error
(
out_host
,
out_device
);
#elif 0
#elif 0
...
...
driver/device_direct_convolution_1.cuh
View file @
20423a35
...
@@ -21,9 +21,11 @@ void device_direct_convolution_1(
...
@@ -21,9 +21,11 @@ void device_direct_convolution_1(
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
in_desc
=
InDesc
{};
constexpr
auto
in_desc
=
InDesc
{};
constexpr
auto
wei_desc
=
WeiDesc
{};
constexpr
auto
wei_desc
=
WeiDesc
{};
constexpr
auto
out_desc
=
OutDesc
{};
constexpr
auto
out_desc
=
OutDesc
{};
#if 0
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 2;
constexpr unsigned NPerBlock = 2;
...
@@ -37,6 +39,21 @@ void device_direct_convolution_1(
...
@@ -37,6 +39,21 @@ void device_direct_convolution_1(
constexpr unsigned CPerThread = 2;
constexpr unsigned CPerThread = 2;
constexpr unsigned BlockSize = 128;
constexpr unsigned BlockSize = 128;
#elif
1
constexpr
unsigned
OutTileSizeH
=
2
;
constexpr
unsigned
OutTileSizeW
=
2
;
constexpr
unsigned
NPerBlock
=
2
;
constexpr
unsigned
KPerBlock
=
16
;
constexpr
unsigned
CPerBlock
=
2
;
constexpr
unsigned
YPerBlock
=
2
;
constexpr
unsigned
XPerBlock
=
27
;
constexpr
unsigned
NPerThread
=
2
;
constexpr
unsigned
KPerThread
=
4
;
constexpr
unsigned
CPerThread
=
2
;
constexpr
unsigned
BlockSize
=
216
;
#endif
constexpr
unsigned
GridSize
=
(
out_desc
.
GetLength
(
I0
)
/
NPerBlock
)
*
constexpr
unsigned
GridSize
=
(
out_desc
.
GetLength
(
I0
)
/
NPerBlock
)
*
(
out_desc
.
GetLength
(
I1
)
/
KPerBlock
)
*
(
out_desc
.
GetLength
(
I1
)
/
KPerBlock
)
*
...
...
driver/device_direct_convolution_2.cuh
View file @
20423a35
...
@@ -21,9 +21,11 @@ void device_direct_convolution_2(
...
@@ -21,9 +21,11 @@ void device_direct_convolution_2(
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
in_desc
=
InDesc
{};
constexpr
auto
in_desc
=
InDesc
{};
constexpr
auto
wei_desc
=
WeiDesc
{};
constexpr
auto
wei_desc
=
WeiDesc
{};
constexpr
auto
out_desc
=
OutDesc
{};
constexpr
auto
out_desc
=
OutDesc
{};
#if 0
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 2;
constexpr unsigned NPerBlock = 2;
...
@@ -37,6 +39,35 @@ void device_direct_convolution_2(
...
@@ -37,6 +39,35 @@ void device_direct_convolution_2(
constexpr unsigned CPerThread = 2;
constexpr unsigned CPerThread = 2;
constexpr unsigned BlockSize = 128;
constexpr unsigned BlockSize = 128;
#elif
1
constexpr
unsigned
OutTileSizeH
=
2
;
constexpr
unsigned
OutTileSizeW
=
2
;
constexpr
unsigned
NPerBlock
=
2
;
constexpr
unsigned
KPerBlock
=
32
;
constexpr
unsigned
CPerBlock
=
4
;
constexpr
unsigned
YPerBlock
=
1
;
constexpr
unsigned
XPerBlock
=
27
;
constexpr
unsigned
NPerThread
=
2
;
constexpr
unsigned
KPerThread
=
4
;
constexpr
unsigned
CPerThread
=
2
;
constexpr
unsigned
BlockSize
=
216
;
#elif 1
constexpr
unsigned
OutTileSizeH
=
2
;
constexpr
unsigned
OutTileSizeW
=
2
;
constexpr
unsigned
NPerBlock
=
2
;
constexpr
unsigned
KPerBlock
=
32
;
constexpr
unsigned
CPerBlock
=
4
;
constexpr
unsigned
YPerBlock
=
1
;
constexpr
unsigned
XPerBlock
=
32
;
constexpr
unsigned
NPerThread
=
2
;
constexpr
unsigned
KPerThread
=
4
;
constexpr
unsigned
CPerThread
=
2
;
constexpr
unsigned
BlockSize
=
256
;
#endif
constexpr
unsigned
GridSize
=
(
out_desc
.
GetLength
(
I0
)
/
NPerBlock
)
*
constexpr
unsigned
GridSize
=
(
out_desc
.
GetLength
(
I0
)
/
NPerBlock
)
*
(
out_desc
.
GetLength
(
I1
)
/
KPerBlock
)
*
(
out_desc
.
GetLength
(
I1
)
/
KPerBlock
)
*
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment