Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
e9ac4855
"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "e23c27e90524c3c3e93525d03f18408b84372012"
Commit
e9ac4855
authored
Jan 21, 2019
by
Chao Liu
Browse files
tune
parent
b5b4fd28
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
18 additions
and
15 deletions
+18
-15
driver/conv.cu
driver/conv.cu
+1
-1
driver/device_implicit_gemm_convolution_2_cnhw_srck_knhw.cuh
driver/device_implicit_gemm_convolution_2_cnhw_srck_knhw.cuh
+9
-9
src/include/gemm.cuh
src/include/gemm.cuh
+4
-4
src/include/gridwise_implicit_gemm_convolution_2_cnhw_srck_knhw.cuh
...e/gridwise_implicit_gemm_convolution_2_cnhw_srck_knhw.cuh
+4
-1
No files found.
driver/conv.cu
View file @
e9ac4855
...
@@ -357,7 +357,7 @@ int main()
...
@@ -357,7 +357,7 @@ int main()
constexpr unsigned C = 1;
constexpr unsigned C = 1;
constexpr unsigned HI = 34;
constexpr unsigned HI = 34;
constexpr unsigned WI = 34;
constexpr unsigned WI = 34;
constexpr unsigned K =
4
;
constexpr unsigned K =
1
;
constexpr unsigned S = 3;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
constexpr unsigned R = 3;
#elif
1
#elif
1
...
...
driver/device_implicit_gemm_convolution_2_cnhw_srck_knhw.cuh
View file @
e9ac4855
...
@@ -67,29 +67,29 @@ void device_implicit_gemm_convolution_2_cnhw_srck_knhw(InDesc,
...
@@ -67,29 +67,29 @@ void device_implicit_gemm_convolution_2_cnhw_srck_knhw(InDesc,
Tensor
<
T
>
out_knhw
(
make_TensorDescriptor
(
out_knhw_desc
));
Tensor
<
T
>
out_knhw
(
make_TensorDescriptor
(
out_knhw_desc
));
#if 0
#if 0
constexpr unsigned BPerBlock =
128
;
constexpr unsigned BPerBlock =
256
;
constexpr unsigned KPerBlock =
4
;
constexpr unsigned KPerBlock =
1
;
constexpr unsigned CPerBlock = 1;
constexpr unsigned CPerBlock = 1;
constexpr unsigned BPerThread =
4
;
constexpr unsigned BPerThread =
8
;
constexpr unsigned KPerThread = 1;
constexpr unsigned KPerThread = 1;
constexpr unsigned CPerThread = 1;
constexpr unsigned CPerThread = 1;
constexpr unsigned ThreadPerClusterRow =
4
;
constexpr unsigned ThreadPerClusterRow =
1
;
constexpr unsigned ThreadPerClusterColumn =
16
;
constexpr unsigned ThreadPerClusterColumn =
4
;
constexpr unsigned BlockSize =
128
;
constexpr unsigned BlockSize =
32
;
#elif
1
#elif
1
constexpr
unsigned
BPerBlock
=
128
;
constexpr
unsigned
BPerBlock
=
128
;
constexpr
unsigned
KPerBlock
=
64
;
constexpr
unsigned
KPerBlock
=
64
;
constexpr
unsigned
CPerBlock
=
2
;
constexpr
unsigned
CPerBlock
=
2
;
constexpr
unsigned
BPerThread
=
4
;
constexpr
unsigned
BPerThread
=
8
;
constexpr
unsigned
KPerThread
=
16
;
constexpr
unsigned
KPerThread
=
8
;
constexpr
unsigned
CPerThread
=
1
;
constexpr
unsigned
CPerThread
=
1
;
constexpr
unsigned
ThreadPerClusterRow
=
4
;
constexpr
unsigned
ThreadPerClusterRow
=
4
;
constexpr
unsigned
ThreadPerClusterColumn
=
16
;
constexpr
unsigned
ThreadPerClusterColumn
=
4
;
constexpr
unsigned
BlockSize
=
128
;
constexpr
unsigned
BlockSize
=
128
;
#endif
#endif
...
...
src/include/gemm.cuh
View file @
e9ac4855
...
@@ -388,9 +388,9 @@ struct blockwise_gemm_block_a_block_b_thread_c
...
@@ -388,9 +388,9 @@ struct blockwise_gemm_block_a_block_b_thread_c
const
unsigned
thread_work_cluster_id
=
const
unsigned
thread_work_cluster_id
=
thread_id
-
cluster_work_block_id
*
(
MThreadPerCluster
*
NThreadPerCluster
);
thread_id
-
cluster_work_block_id
*
(
MThreadPerCluster
*
NThreadPerCluster
);
const
unsigned
m_cluster_work_block_id
=
cluster_work_block_id
/
N
ThreadPer
Cluster
;
const
unsigned
m_cluster_work_block_id
=
cluster_work_block_id
/
NCluster
Work
;
const
unsigned
n_cluster_work_block_id
=
const
unsigned
n_cluster_work_block_id
=
cluster_work_block_id
-
m_cluster_work_block_id
*
N
ThreadPer
Cluster
;
cluster_work_block_id
-
m_cluster_work_block_id
*
NCluster
Work
;
const
unsigned
m_thread_work_cluster_id
=
const
unsigned
m_thread_work_cluster_id
=
thread_work_cluster_id
/
NThreadPerCluster
;
thread_work_cluster_id
/
NThreadPerCluster
;
...
@@ -401,12 +401,12 @@ struct blockwise_gemm_block_a_block_b_thread_c
...
@@ -401,12 +401,12 @@ struct blockwise_gemm_block_a_block_b_thread_c
if(get_block_1d_id() == 0)
if(get_block_1d_id() == 0)
{
{
printf("%u %u, \t"
printf("%u %u, \t"
//
"MClusterWork %u MThreadPerCluster %u NClusterWork %u NThreadPerCluster %u \t"
"MClusterWork %u MThreadPerCluster %u NClusterWork %u NThreadPerCluster %u \t"
"m_cluster_work_block_id %u n_cluster_work_block_id %u \t"
"m_cluster_work_block_id %u n_cluster_work_block_id %u \t"
"m_thread_work_cluster_id %u n_thread_work_cluster_id %u \t"
"m_thread_work_cluster_id %u n_thread_work_cluster_id %u \t"
"\n",
"\n",
get_block_1d_id(), get_thread_local_1d_id(),
get_block_1d_id(), get_thread_local_1d_id(),
//
MClusterWork, MThreadPerCluster, NClusterWork, NThreadPerCluster,
MClusterWork, MThreadPerCluster, NClusterWork, NThreadPerCluster,
m_cluster_work_block_id, n_cluster_work_block_id,
m_cluster_work_block_id, n_cluster_work_block_id,
m_thread_work_cluster_id, n_thread_work_cluster_id);
m_thread_work_cluster_id, n_thread_work_cluster_id);
}
}
...
...
src/include/gridwise_implicit_gemm_convolution_2_cnhw_srck_knhw.cuh
View file @
e9ac4855
...
@@ -239,10 +239,13 @@ gridwise_implicit_gemm_convolution_2_cnhw_srck_knhw(InGlobalDesc,
...
@@ -239,10 +239,13 @@ gridwise_implicit_gemm_convolution_2_cnhw_srck_knhw(InGlobalDesc,
p_out_thread[out_kb_thread_desc.Get1dIndex(k, b)]);
p_out_thread[out_kb_thread_desc.Get1dIndex(k, b)]);
}
}
#endif
#endif
if
(
k_data
<
K
&&
n_data
<
N
&&
h_data
<
Ho
&&
w_data
<
Wo
)
if
(
n_data
<
N
&&
h_data
<
Ho
&&
w_data
<
Wo
)
{
{
#if 1
p_out_global
[
out_knhw_global_desc
.
Get1dIndex
(
k_data
,
n_data
,
h_data
,
w_data
)]
=
p_out_global
[
out_knhw_global_desc
.
Get1dIndex
(
k_data
,
n_data
,
h_data
,
w_data
)]
=
p_out_thread
[
out_kb_thread_desc
.
Get1dIndex
(
k
,
b
)];
p_out_thread
[
out_kb_thread_desc
.
Get1dIndex
(
k
,
b
)];
#endif
#if 0
#if 0
if(get_block_1d_id() == 0)
if(get_block_1d_id() == 0)
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment