Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
28459058
Commit
28459058
authored
Aug 29, 2023
by
letaoqin
Browse files
add check code
parent
d10f25a0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
0 deletions
+26
-0
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v1.hpp
...ice/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v1.hpp
+13
-0
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v2.hpp
...ice/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v2.hpp
+13
-0
No files found.
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v1.hpp
View file @
28459058
...
@@ -1129,6 +1129,19 @@ struct DeviceGroupedMultiheadAttentionBackward_Qloop_Xdl_CShuffle_V1
...
@@ -1129,6 +1129,19 @@ struct DeviceGroupedMultiheadAttentionBackward_Qloop_Xdl_CShuffle_V1
return
false
;
return
false
;
}
}
if
constexpr
(
!
is_same
<
D0DataType
,
void
>::
value
)
{
if
(
device_arg
.
d0_n_length_stride_
[
1
]
==
1
&&
device_arg
.
d0_n_length_stride_
[
0
]
%
D0BlockTransferSrcScalarPerVector
!=
0
)
{
return
false
;
}
if
(
device_arg
.
d0_n_length_stride_
[
1
]
!=
1
&&
D0BlockTransferSrcScalarPerVector
!=
1
)
{
return
false
;
}
}
// Note: we need raw lengths since threadwise copy can not handle vector load when part
// Note: we need raw lengths since threadwise copy can not handle vector load when part
// of vector is out of bounds Note: need lowest dim in Ms/Ns/Ks/Os, not merged M/N/K/O
// of vector is out of bounds Note: need lowest dim in Ms/Ns/Ks/Os, not merged M/N/K/O
const
auto
MzRaw
=
device_arg
.
raw_lengths_mz_nz_kz_gemm1nz_
[
0
];
const
auto
MzRaw
=
device_arg
.
raw_lengths_mz_nz_kz_gemm1nz_
[
0
];
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v2.hpp
View file @
28459058
...
@@ -1135,6 +1135,19 @@ struct DeviceGroupedMultiheadAttentionBackward_Qloop_Xdl_CShuffle_V2
...
@@ -1135,6 +1135,19 @@ struct DeviceGroupedMultiheadAttentionBackward_Qloop_Xdl_CShuffle_V2
return
false
;
return
false
;
}
}
if
constexpr
(
!
is_same
<
D0DataType
,
void
>::
value
)
{
if
(
device_arg
.
d0_n_length_stride_
[
1
]
==
1
&&
device_arg
.
d0_n_length_stride_
[
0
]
%
D0BlockTransferSrcScalarPerVector
!=
0
)
{
return
false
;
}
if
(
device_arg
.
d0_n_length_stride_
[
1
]
!=
1
&&
D0BlockTransferSrcScalarPerVector
!=
1
)
{
return
false
;
}
}
// Note: we need raw lengths since threadwise copy can not handle vector load when part
// Note: we need raw lengths since threadwise copy can not handle vector load when part
// of vector is out of bounds Note: need lowest dim in Ms/Ns/Ks/Os, not merged M/N/K/O
// of vector is out of bounds Note: need lowest dim in Ms/Ns/Ks/Os, not merged M/N/K/O
const
auto
MzRaw
=
device_arg
.
raw_lengths_mz_nz_kz_gemm1nz_
[
0
];
const
auto
MzRaw
=
device_arg
.
raw_lengths_mz_nz_kz_gemm1nz_
[
0
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment