Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
76e95a5e
Commit
76e95a5e
authored
Jul 29, 2024
by
danyao12
Browse files
fix hd128 scratches and boost performance
parent
ad3e94bb
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
3 deletions
+3
-3
example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
+2
-2
example/ck_tile/01_fmha/fmha_bwd.cpp
example/ck_tile/01_fmha/fmha_bwd.cpp
+1
-1
No files found.
example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
View file @
76e95a5e
...
@@ -455,7 +455,7 @@ def get_fmha_bwd_dq_dk_dv_tile_ppl_dict_from_dtype(dtype : str) -> Optional[dict
...
@@ -455,7 +455,7 @@ def get_fmha_bwd_dq_dk_dv_tile_ppl_dict_from_dtype(dtype : str) -> Optional[dict
# "kr_ktr_vr"],
# "kr_ktr_vr"],
'64'
:
[
FmhaBwdDQDKDVTileSize
(
64
,
128
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
1
,
4
,
1
,
4
,
1
,
1
,
2
,
2
,
1
,
32
,
32
,
16
,
32
,
32
,
16
,
1
),
'64'
:
[
FmhaBwdDQDKDVTileSize
(
64
,
128
,
64
,
64
,
64
,
64
,
64
,
64
,
64
,
1
,
4
,
1
,
4
,
1
,
1
,
2
,
2
,
1
,
32
,
32
,
16
,
32
,
32
,
16
,
1
),
"kr_ktr_vr"
],
"kr_ktr_vr"
],
# '128' : [FmhaBwdDQDKDVTileSize(
32
, 128, 128,
32
, 128,
32
, 32, 128, 128, 1, 4, 1, 4, 1, 1, 1, 4, 1,
32
, 32, 16,
32, 32
, 16, 1),
# '128' : [FmhaBwdDQDKDVTileSize(
16
, 128, 128,
16
, 128,
16
, 32, 128, 128, 1, 4, 1, 4, 1, 1, 1, 4, 1,
16, 16
, 32, 16,
16
, 16, 1),
# "kr_ktr_vr"],
# "kr_ktr_vr"],
# '256' : [FmhaBwdDQDKDVTileSize( 16, 64, 256, 16, 256, 16, 32, 256, 256, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 32, 16, 16, 16, 1),
# '256' : [FmhaBwdDQDKDVTileSize( 16, 64, 256, 16, 256, 16, 32, 256, 256, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 32, 16, 16, 16, 1),
# "kr_ktr_vr"]
# "kr_ktr_vr"]
...
@@ -481,7 +481,7 @@ def get_bwd_dq_dk_dv_blobs(kernel_filter : Optional[str], receipt, mask_impl) ->
...
@@ -481,7 +481,7 @@ def get_bwd_dq_dk_dv_blobs(kernel_filter : Optional[str], receipt, mask_impl) ->
continue
continue
if
((
bias
==
"no"
or
bias
==
"alibi"
)
and
dbias
==
"t"
):
if
((
bias
==
"no"
or
bias
==
"alibi"
)
and
dbias
==
"t"
):
continue
continue
if
(((
hdim
==
64
or
hdim
==
128
)
and
(
"wg16"
in
dropout
))
or
((
hdim
=
=
32
or
hdim
==
25
6
)
and
(
"wg32"
in
dropout
))):
if
(((
hdim
==
64
)
and
(
"wg16"
in
dropout
))
or
((
hdim
!
=
6
4
)
and
(
"wg32"
in
dropout
))):
continue
continue
k
=
FmhaBwdDQDKDVKernel
(
F_idx
=
0
,
F_hdim
=
hdim
,
F_dtype
=
dtype
,
F_tile
=
tile
,
k
=
FmhaBwdDQDKDVKernel
(
F_idx
=
0
,
F_hdim
=
hdim
,
F_dtype
=
dtype
,
F_tile
=
tile
,
F_spad
=
spad
,
F_skpad
=
skpad
,
F_dpad
=
dpad
,
F_dvpad
=
dvpad
,
F_spad
=
spad
,
F_skpad
=
skpad
,
F_dpad
=
dpad
,
F_dvpad
=
dvpad
,
...
...
example/ck_tile/01_fmha/fmha_bwd.cpp
View file @
76e95a5e
...
@@ -265,7 +265,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
...
@@ -265,7 +265,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
(
mode
==
mode_enum
::
batch
?
seqlen_q
:
seqstart_q_host
.
back
());
(
mode
==
mode_enum
::
batch
?
seqlen_q
:
seqstart_q_host
.
back
());
const
ck_tile
::
index_t
shape_seqlen_k
=
const
ck_tile
::
index_t
shape_seqlen_k
=
(
mode
==
mode_enum
::
batch
?
seqlen_k
:
seqstart_k_host
.
back
());
(
mode
==
mode_enum
::
batch
?
seqlen_k
:
seqstart_k_host
.
back
());
const
ck_tile
::
index_t
kN0
=
(
hdim_q
>
32
&
hdim_q
<=
128
)
?
128
:
64
;
const
ck_tile
::
index_t
kN0
=
(
hdim_q
<=
128
)
?
128
:
64
;
const
ck_tile
::
index_t
nsplits
=
const
ck_tile
::
index_t
nsplits
=
deterministic
?
ck_tile
::
integer_divide_ceil
(
max_seqlen_k
,
kN0
)
:
1
;
deterministic
?
ck_tile
::
integer_divide_ceil
(
max_seqlen_k
,
kN0
)
:
1
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment