Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
81e68c64
"vscode:/vscode.git/clone" did not exist on "b4eb18dcd60098b950290ba9930b630be84af8d3"
Unverified
Commit
81e68c64
authored
Jan 22, 2025
by
Max Podkorytov
Browse files
copy over fmha example
parent
c5fff071
Changes
26
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
485 additions
and
0 deletions
+485
-0
example/ck_tile/18_flexattn/script/benchmark_fwd.sh
example/ck_tile/18_flexattn/script/benchmark_fwd.sh
+31
-0
example/ck_tile/18_flexattn/script/run_full_test.sh
example/ck_tile/18_flexattn/script/run_full_test.sh
+46
-0
example/ck_tile/18_flexattn/script/smoke_test_bwd.sh
example/ck_tile/18_flexattn/script/smoke_test_bwd.sh
+35
-0
example/ck_tile/18_flexattn/script/smoke_test_fwd.sh
example/ck_tile/18_flexattn/script/smoke_test_fwd.sh
+106
-0
example/ck_tile/18_flexattn/utils.hpp
example/ck_tile/18_flexattn/utils.hpp
+266
-0
example/ck_tile/CMakeLists.txt
example/ck_tile/CMakeLists.txt
+1
-0
No files found.
example/ck_tile/18_flexattn/script/benchmark_fwd.sh
0 → 100755
View file @
81e68c64
#!/bin/sh
# TODO: run this script from CK root or build directory
EXE
=
"
$(
find
.
-name
tile_example_fmha_fwd
-type
f |
head
-n
1
)
"
VALID
=
0
for
prec
in
"fp16"
"bf16"
;
do
for
perm
in
0 1
;
do
for
hdim
in
64 128 256
;
do
nhead
=
$((
2048
/
$hdim
))
# follow fav2 setup
$EXE
-prec
=
$prec
-b
=
32
-h
=
$nhead
-d
=
$hdim
-s
=
512
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
16
-h
=
$nhead
-d
=
$hdim
-s
=
1024
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
8
-h
=
$nhead
-d
=
$hdim
-s
=
2048
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
4
-h
=
$nhead
-d
=
$hdim
-s
=
4096
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
2
-h
=
$nhead
-d
=
$hdim
-s
=
8192
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
1
-h
=
$nhead
-d
=
$hdim
-s
=
16384
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
done
done
done
for
perm
in
0 1
;
do
$EXE
-prec
=
fp8
-squant
=
1
-b
=
32
-h
=
16
-d
=
128
-s
=
512
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
16
-h
=
16
-d
=
128
-s
=
1024
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
8
-h
=
16
-d
=
128
-s
=
2048
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
4
-h
=
16
-d
=
128
-s
=
4096
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
2
-h
=
16
-d
=
128
-s
=
8192
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
1
-h
=
16
-d
=
128
-s
=
16384
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
done
\ No newline at end of file
example/ck_tile/18_flexattn/script/run_full_test.sh
0 → 100755
View file @
81e68c64
#!/bin/bash
#
# in order to run this script you'd first need to build the tile_example_fmha_fwd and tile_eaxmple_fmha_bwd executables in ../build/bin/
#
# run the script as "./run_full_test.sh <tag for your test environment> <branch name> <host name> <gpu_arch>
# input arguments:
# environment tag : a string describing the specifics of your test environment
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
# host name : $hostname
# gpu architecture: e.g., gfx90a, or gfx942, etc.
#get the command line arguments:
export
env_type
=
$1
echo
'Environment type: '
$env_type
export
branch
=
$2
echo
'Branch name: '
$branch
export
host_name
=
$3
echo
'Host name: '
$host_name
export
GPU_arch
=
$4
echo
'GPU_arch: '
$GPU_arch
function
print_log_header
(){
rm
-f
$1
;
echo
'On branch '
$3
&>
$1
;
echo
'Node name: '
$4
>>
$1
;
#get GPU_arch and number of compute units from rocminfo
echo
-n
"GPU_arch: "
>>
$1
;
rocminfo |
grep
"Name:"
|
grep
"gfx"
>>
$1
;
rocminfo |
grep
"Compute Unit:"
>>
$1
;
hipcc
--version
|
grep
-e
'HIP version'
>>
$1
;
echo
'Environment type: '
$2
>>
$1
;
/opt/rocm/bin/amdclang++
--version
|
grep
-e
'InstalledDir'
>>
$1
;
}
#run verification tests
example/ck_tile/01_fmha/script/smoke_test_fwd.sh
example/ck_tile/01_fmha/script/smoke_test_bwd.sh
#run performance benchmarks
export
fmha_fwd_log
=
"perf_fmha_fwd_
$GPU_arch
.log"
print_log_header
$fmha_fwd_log
$env_type
$branch
$host_name
example/ck_tile/01_fmha/script/benchmark_fwd.sh 2>&1 |
tee
-a
$fmha_fwd_log
export
fmha_bwd_log
=
"perf_fmha_bwd_
$GPU_arch
.log"
print_log_header
$fmha_bwd_log
$env_type
$branch
$host_name
example/ck_tile/01_fmha/script/benchmark_bwd.sh 2>&1 |
tee
-a
$fmha_bwd_log
example/ck_tile/18_flexattn/script/smoke_test_bwd.sh
0 → 100755
View file @
81e68c64
#!/bin/sh
# TODO: run this script from CK root or build directory
EXE
=
"
$(
find
.
-name
tile_example_fmha_bwd
-type
f |
head
-n
1
)
"
KNAME
=
1
export
CK_WARMUP
=
0
export
CK_REPEAT
=
1
COMMON_ARGS
=
'-v=1'
set
-x
for
prec
in
"fp16"
"bf16"
;
do
for
perm
in
0 1
;
do
for
hdim
in
32 64 128 256
;
do
for
mode
in
0 1
;
do
for
bias
in
"n"
"a"
;
do
for
dbias
in
0
;
do
for
p_drop
in
0.0 0.2
;
do
for
deterministic
in
0
;
do
$EXE
-prec
=
$prec
-b
=
1
-h
=
4
-h_k
=
2
-d
=
$hdim
-s
=
259
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
2
-h
=
2
-d
=
$hdim
-s
=
516
-s_k
=
253
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
1
-h
=
4
-h_k
=
1
-d
=
$hdim
-s
=
500
-s_k
=
251
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-mask
=
1
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
1
-h
=
2
-d
=
$hdim
-s
=
900
-s_k
=
258
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-mask
=
2
-v
=
1
-deterministic
=
$deterministic
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
2
-h
=
1
-d
=
$hdim
-s
=
987
-s_k
=
219
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-mask
=
t:128,30
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
2
-h
=
3
-h_k
=
1
-d
=
$hdim
-s
=
244
-s_k
=
499
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-mask
=
b:4,35
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
done
done
done
done
done
done
done
done
set
+x
example/ck_tile/18_flexattn/script/smoke_test_fwd.sh
0 → 100755
View file @
81e68c64
This diff is collapsed.
Click to expand it.
example/ck_tile/18_flexattn/utils.hpp
0 → 100644
View file @
81e68c64
This diff is collapsed.
Click to expand it.
example/ck_tile/CMakeLists.txt
View file @
81e68c64
...
...
@@ -17,4 +17,5 @@ add_subdirectory(14_moe_smoothquant)
add_subdirectory
(
15_fused_moe
)
add_subdirectory
(
16_batched_gemm
)
add_subdirectory
(
17_grouped_gemm
)
add_subdirectory
(
18_flexattn
)
add_subdirectory
(
35_batched_transpose
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment