Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
8986060e
Commit
8986060e
authored
Jun 23, 2023
by
Adam Osewski
Browse files
Warp raked and thread raked access pattern.
parent
70b6d031
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
721 additions
and
5 deletions
+721
-5
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_direct_c_write_out.hpp
...on/gpu/device/impl/device_gemm_xdl_direct_c_write_out.hpp
+18
-5
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline_thread_raked.hpp
...ise_gemm_xdl_direct_c_write_out_roofline_thread_raked.hpp
+703
-0
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline_warp_raked.hpp
...dwise_gemm_xdl_direct_c_write_out_roofline_warp_raked.hpp
+0
-0
No files found.
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_direct_c_write_out.hpp
View file @
8986060e
...
@@ -6,13 +6,16 @@
...
@@ -6,13 +6,16 @@
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include "ck/host_utility/io.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline_warp_raked.hpp"
// #include
// "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline_thread_raked.hpp"
// #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out.hpp"
// #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/kernel_launch.hpp"
...
@@ -656,7 +659,7 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout,
...
@@ -656,7 +659,7 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout,
// polymorphic
// polymorphic
std
::
string
GetTypeString
()
const
override
std
::
string
GetTypeString
()
const
override
{
{
auto
str
=
std
::
stringstream
()
;
auto
str
=
std
::
stringstream
{}
;
std
::
map
<
LoopScheduler
,
std
::
string
>
LoopSchedToString
{
std
::
map
<
LoopScheduler
,
std
::
string
>
LoopSchedToString
{
{
LoopScheduler
::
Default
,
"Default"
},
{
LoopScheduler
::
Interwave
,
"Interwave"
}};
{
LoopScheduler
::
Default
,
"Default"
},
{
LoopScheduler
::
Interwave
,
"Interwave"
}};
...
@@ -664,6 +667,13 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout,
...
@@ -664,6 +667,13 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout,
std
::
map
<
PipelineVersion
,
std
::
string
>
PipelineVersionToString
{{
PipelineVersion
::
v1
,
"v1"
},
std
::
map
<
PipelineVersion
,
std
::
string
>
PipelineVersionToString
{{
PipelineVersion
::
v1
,
"v1"
},
{
PipelineVersion
::
v2
,
"v2"
}};
{
PipelineVersion
::
v2
,
"v2"
}};
auto
c_thr_dst_access_order_str
=
std
::
ostringstream
{};
c_thr_dst_access_order_str
<<
"{"
;
ck
::
static_for
<
0
,
CThreadTransferDstAccessOrder
::
Size
(),
1
>
{}([
&
](
auto
i
)
{
c_thr_dst_access_order_str
<<
CThreadTransferDstAccessOrder
::
At
(
i
).
value
<<
", "
;
});
c_thr_dst_access_order_str
<<
"}"
;
// clang-format off
// clang-format off
str
<<
"DeviceGemm_Xdl_DirectCWriteOut"
str
<<
"DeviceGemm_Xdl_DirectCWriteOut"
<<
"<"
<<
"<"
...
@@ -672,12 +682,15 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout,
...
@@ -672,12 +682,15 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout,
<<
NPerBlock
<<
", "
<<
NPerBlock
<<
", "
<<
KPerBlock
<<
", "
<<
KPerBlock
<<
", "
<<
AK1
<<
", "
<<
AK1
<<
", "
<<
BK1
<<
BK1
<<
", "
<<
c_thr_dst_access_order_str
.
str
()
<<
", "
<<
CThreadTransferDstVectorDim
<<
", "
<<
CThreadTransferDstScalarPerVector
<<
", "
<<
">"
<<
">"
<<
" LoopScheduler: "
<<
" LoopScheduler: "
<<
LoopSchedToString
[
LoopSched
]
<<
"
,
"
<<
LoopSchedToString
[
LoopSched
]
<<
"
;
"
<<
"PipelineVersion: "
<<
"PipelineVersion: "
<<
PipelineVersionToString
[
PipelineVer
];
;
<<
PipelineVersionToString
[
PipelineVer
];
// clang-format on
// clang-format on
return
str
.
str
();
return
str
.
str
();
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline_thread_raked.hpp
0 → 100644
View file @
8986060e
This diff is collapsed.
Click to expand it.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline.hpp
→
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline
_warp_raked
.hpp
View file @
8986060e
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment