Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4dd82801
Commit
4dd82801
authored
Mar 22, 2023
by
Adam Osewski
Browse files
Small refinements.
parent
fe96e8fb
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
5 deletions
+15
-5
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
...ion/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp
...tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp
+3
-3
profiler/include/profiler/profile_grouped_gemm_impl.hpp
profiler/include/profiler/profile_grouped_gemm_impl.hpp
+11
-1
No files found.
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
View file @
4dd82801
...
...
@@ -744,7 +744,7 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
<<
K1
<<
", "
<<
M1PerThread
<<
", "
<<
N1PerThread
<<
", "
<<
KPerThread
<<
KPerThread
<<
", "
<<
getGemmSpecializationString
(
GemmSpec
)
<<
">"
;
// clang-format on
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp
View file @
4dd82801
...
...
@@ -315,7 +315,7 @@ struct GridwiseGemmDlMultipleD_km_kn_mn
static_assert
(
a_block_desc_k0_m0_m1_k1
.
GetElementSpaceSize
()
==
a_k0_m_k1_block_desc
.
GetElementSpaceSize
()
&&
b_block_desc_k0_n0_n1_k1
.
GetElementSpaceSize
()
==
b_k0_n_k1_block_desc
.
GetElementSpaceSize
()
&&
b_k0_n_k1_block_desc
.
GetElementSpaceSize
()
,
"wrong!"
);
// A matrix blockwise copy
...
...
profiler/include/profiler/profile_grouped_gemm_impl.hpp
View file @
4dd82801
...
...
@@ -253,7 +253,12 @@ bool profile_grouped_gemm_impl(int do_verification,
c_element_op
);
ref_invoker
.
Run
(
ref_argument
);
pass
=
pass
&&
ck
::
utils
::
check_err
(
c_m_n_device_results
[
i
],
c_m_n_host_result
);
bool
group_pass
=
ck
::
utils
::
check_err
(
c_m_n_device_results
[
i
],
c_m_n_host_result
);
pass
=
pass
&&
group_pass
;
std
::
cout
<<
"group: "
<<
i
<<
" verification result: "
<<
std
::
boolalpha
<<
group_pass
<<
std
::
endl
;
if
(
do_log
)
{
...
...
@@ -276,6 +281,11 @@ bool profile_grouped_gemm_impl(int do_verification,
}
}
if
(
do_verification
)
{
std
::
cout
<<
"Verification: "
<<
(
pass
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_gemm_name
<<
std
::
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment