Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
d9ce3a57
Commit
d9ce3a57
authored
Jul 05, 2023
by
Po-Yen, Chen
Browse files
Fix format
parent
b5f11e02
Changes
28
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
31 additions
and
48 deletions
+31
-48
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp
...emm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp
+1
-1
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp
...xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp
+1
-2
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp
...xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp
...l_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp
...6_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp
+1
-2
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp
...6_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp
...f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp
...emm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp
+1
-1
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp
...xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp
+1
-2
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp
...xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp
...l_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp
...6_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp
+1
-2
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp
...6_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp
...f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp
...emm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp
+1
-1
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp
...xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp
+1
-2
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp
...xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp
...l_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp
+2
-3
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp
...6_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp
+1
-2
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp
...6_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp
+2
-3
No files found.
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp
View file @
d9ce3a57
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -29,8 +29,7 @@ using Instances =
...
@@ -29,8 +29,7 @@ using Instances =
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_default_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_default_pipeline_v1_instances
(
OwnerList
<
InstanceNT
>&
OwnerList
<
InstanceNT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp
View file @
d9ce3a57
...
@@ -11,7 +11,7 @@ namespace instance {
...
@@ -11,7 +11,7 @@ namespace instance {
// Compilation parameters for a[k, m] * b[k, n] = c[m, n]
// Compilation parameters for a[k, m] * b[k, n] = c[m, n]
using
Instances
=
using
Instances
=
std
::
tuple
<
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
// pipeline v2, 1 wave
// pipeline v2, 1 wave
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -31,8 +31,7 @@ using Instances =
...
@@ -31,8 +31,7 @@ using Instances =
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_default_pipeline_v2_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_default_pipeline_v2_instances
(
OwnerList
<
InstanceNT
>&
OwnerList
<
InstanceNT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -11,7 +11,7 @@ namespace instance {
...
@@ -11,7 +11,7 @@ namespace instance {
// Compilation parameters for a[k, m] * b[k, n] = c[m, n]
// Compilation parameters for a[k, m] * b[k, n] = c[m, n]
using
Instances
=
using
Instances
=
std
::
tuple
<
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
// pipeline v1, 2 waves
// pipeline v1, 2 waves
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -31,8 +31,7 @@ using Instances =
...
@@ -31,8 +31,7 @@ using Instances =
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_interwave_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_interwave_pipeline_v1_instances
(
OwnerList
<
InstanceNT
>&
OwnerList
<
InstanceNT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -21,8 +21,7 @@ using Instances = std::tuple<
...
@@ -21,8 +21,7 @@ using Instances = std::tuple<
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_irregular_default_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_irregular_default_pipeline_v1_instances
(
OwnerList
<
InstanceNT
>&
OwnerList
<
InstanceNT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp
View file @
d9ce3a57
...
@@ -10,7 +10,7 @@ namespace instance {
...
@@ -10,7 +10,7 @@ namespace instance {
// irregular tile size
// irregular tile size
using
Instances
=
std
::
tuple
<
using
Instances
=
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
// pipeline v2, 1 wave
// pipeline v2, 1 wave
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_irregular_default_pipeline_v2_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_irregular_default_pipeline_v2_instances
(
OwnerList
<
InstanceNT
>&
OwnerList
<
InstanceNT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -10,7 +10,7 @@ namespace instance {
...
@@ -10,7 +10,7 @@ namespace instance {
// irregular tile size
// irregular tile size
using
Instances
=
std
::
tuple
<
using
Instances
=
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
// pipeline v1, 2 waves
// pipeline v1, 2 waves
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_irregular_interwave_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_kn_mn_irregular_interwave_pipeline_v1_instances
(
OwnerList
<
InstanceNT
>&
OwnerList
<
InstanceNT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp
View file @
d9ce3a57
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -29,8 +29,7 @@ using Instances =
...
@@ -29,8 +29,7 @@ using Instances =
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_default_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_default_pipeline_v1_instances
(
OwnerList
<
InstanceNN
>&
OwnerList
<
InstanceNN
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp
View file @
d9ce3a57
...
@@ -11,7 +11,7 @@ namespace instance {
...
@@ -11,7 +11,7 @@ namespace instance {
// Compilation parameters for a[k, m] * b[n, k] = c[m, n]
// Compilation parameters for a[k, m] * b[n, k] = c[m, n]
using
Instances
=
using
Instances
=
std
::
tuple
<
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
// pipeline v2, 1 wave
// pipeline v2, 1 wave
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -31,8 +31,7 @@ using Instances =
...
@@ -31,8 +31,7 @@ using Instances =
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_default_pipeline_v2_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_default_pipeline_v2_instances
(
OwnerList
<
InstanceNN
>&
OwnerList
<
InstanceNN
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -11,7 +11,7 @@ namespace instance {
...
@@ -11,7 +11,7 @@ namespace instance {
// Compilation parameters for a[k, m] * b[n, k] = c[m, n]
// Compilation parameters for a[k, m] * b[n, k] = c[m, n]
using
Instances
=
using
Instances
=
std
::
tuple
<
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
// pipeline v1, 2 waves
// pipeline v1, 2 waves
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -31,8 +31,7 @@ using Instances =
...
@@ -31,8 +31,7 @@ using Instances =
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_interwave_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_interwave_pipeline_v1_instances
(
OwnerList
<
InstanceNN
>&
OwnerList
<
InstanceNN
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -21,8 +21,7 @@ using Instances = std::tuple<
...
@@ -21,8 +21,7 @@ using Instances = std::tuple<
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_irregular_default_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_irregular_default_pipeline_v1_instances
(
OwnerList
<
InstanceNN
>&
OwnerList
<
InstanceNN
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp
View file @
d9ce3a57
...
@@ -10,7 +10,7 @@ namespace instance {
...
@@ -10,7 +10,7 @@ namespace instance {
// irregular tile size
// irregular tile size
using
Instances
=
std
::
tuple
<
using
Instances
=
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
// pipeline v2, 1 wave
// pipeline v2, 1 wave
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_irregular_default_pipeline_v2_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_irregular_default_pipeline_v2_instances
(
OwnerList
<
InstanceNN
>&
OwnerList
<
InstanceNN
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -10,7 +10,7 @@ namespace instance {
...
@@ -10,7 +10,7 @@ namespace instance {
// irregular tile size
// irregular tile size
using
Instances
=
std
::
tuple
<
using
Instances
=
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
// pipeline v1, 2 waves
// pipeline v1, 2 waves
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_irregular_interwave_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_km_nk_mn_irregular_interwave_pipeline_v1_instances
(
OwnerList
<
InstanceNN
>&
OwnerList
<
InstanceNN
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp
View file @
d9ce3a57
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -38,8 +38,7 @@ using Instances =
...
@@ -38,8 +38,7 @@ using Instances =
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_default_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_default_pipeline_v1_instances
(
OwnerList
<
InstanceTT
>&
OwnerList
<
InstanceTT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp
View file @
d9ce3a57
...
@@ -11,7 +11,7 @@ namespace instance {
...
@@ -11,7 +11,7 @@ namespace instance {
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
using
Instances
=
using
Instances
=
std
::
tuple
<
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
// pipeline v2, 1 wave
// pipeline v2, 1 wave
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -40,8 +40,7 @@ using Instances =
...
@@ -40,8 +40,7 @@ using Instances =
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_default_pipeline_v2_instances
(
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_default_pipeline_v2_instances
(
OwnerList
<
InstanceTT
>&
OwnerList
<
InstanceTT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -11,7 +11,7 @@ namespace instance {
...
@@ -11,7 +11,7 @@ namespace instance {
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
// Compilation parameters for a[m, k] * b[k, n] = c[m, n]
using
Instances
=
using
Instances
=
std
::
tuple
<
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
#if CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
// pipeline v1, 2 waves
// pipeline v1, 2 waves
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//##########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -40,8 +40,7 @@ using Instances =
...
@@ -40,8 +40,7 @@ using Instances =
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_interwave_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_interwave_pipeline_v1_instances
(
OwnerList
<
InstanceTT
>&
OwnerList
<
InstanceTT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp
View file @
d9ce3a57
...
@@ -21,8 +21,7 @@ using Instances = std::tuple<
...
@@ -21,8 +21,7 @@ using Instances = std::tuple<
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_irregular_default_pipeline_v1_instances
(
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_irregular_default_pipeline_v1_instances
(
OwnerList
<
InstanceTT
>&
OwnerList
<
InstanceTT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp
View file @
d9ce3a57
...
@@ -10,7 +10,7 @@ namespace instance {
...
@@ -10,7 +10,7 @@ namespace instance {
// irregular tile size
// irregular tile size
using
Instances
=
std
::
tuple
<
using
Instances
=
std
::
tuple
<
// clang-format off
// clang-format off
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
#if CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
// pipeline v2, 1 wave
// pipeline v2, 1 wave
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
//###########| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| NumPrefetch| LoopScheduler| Pipeline|
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
...
@@ -23,8 +23,7 @@ using Instances = std::tuple<
>
;
>
;
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_irregular_default_pipeline_v2_instances
(
void
add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_irregular_default_pipeline_v2_instances
(
OwnerList
<
InstanceTT
>&
OwnerList
<
InstanceTT
>&
instances
)
instances
)
{
{
add_device_operation_instances
(
instances
,
Instances
{});
add_device_operation_instances
(
instances
,
Instances
{});
}
}
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment