Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
81e68c64
Unverified
Commit
81e68c64
authored
Jan 22, 2025
by
Max Podkorytov
Browse files
copy over fmha example
parent
c5fff071
Changes
26
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
485 additions
and
0 deletions
+485
-0
example/ck_tile/18_flexattn/script/benchmark_fwd.sh
example/ck_tile/18_flexattn/script/benchmark_fwd.sh
+31
-0
example/ck_tile/18_flexattn/script/run_full_test.sh
example/ck_tile/18_flexattn/script/run_full_test.sh
+46
-0
example/ck_tile/18_flexattn/script/smoke_test_bwd.sh
example/ck_tile/18_flexattn/script/smoke_test_bwd.sh
+35
-0
example/ck_tile/18_flexattn/script/smoke_test_fwd.sh
example/ck_tile/18_flexattn/script/smoke_test_fwd.sh
+106
-0
example/ck_tile/18_flexattn/utils.hpp
example/ck_tile/18_flexattn/utils.hpp
+266
-0
example/ck_tile/CMakeLists.txt
example/ck_tile/CMakeLists.txt
+1
-0
No files found.
example/ck_tile/18_flexattn/script/benchmark_fwd.sh
0 → 100755
View file @
81e68c64
#!/bin/sh
# TODO: run this script from CK root or build directory
EXE
=
"
$(
find
.
-name
tile_example_fmha_fwd
-type
f |
head
-n
1
)
"
VALID
=
0
for
prec
in
"fp16"
"bf16"
;
do
for
perm
in
0 1
;
do
for
hdim
in
64 128 256
;
do
nhead
=
$((
2048
/
$hdim
))
# follow fav2 setup
$EXE
-prec
=
$prec
-b
=
32
-h
=
$nhead
-d
=
$hdim
-s
=
512
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
16
-h
=
$nhead
-d
=
$hdim
-s
=
1024
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
8
-h
=
$nhead
-d
=
$hdim
-s
=
2048
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
4
-h
=
$nhead
-d
=
$hdim
-s
=
4096
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
2
-h
=
$nhead
-d
=
$hdim
-s
=
8192
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
$prec
-b
=
1
-h
=
$nhead
-d
=
$hdim
-s
=
16384
-iperm
=
$perm
-operm
=
$perm
-kname
=
1
-v
=
$VALID
;
sleep
3
done
done
done
for
perm
in
0 1
;
do
$EXE
-prec
=
fp8
-squant
=
1
-b
=
32
-h
=
16
-d
=
128
-s
=
512
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
16
-h
=
16
-d
=
128
-s
=
1024
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
8
-h
=
16
-d
=
128
-s
=
2048
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
4
-h
=
16
-d
=
128
-s
=
4096
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
2
-h
=
16
-d
=
128
-s
=
8192
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
$EXE
-prec
=
fp8
-squant
=
1
-b
=
1
-h
=
16
-d
=
128
-s
=
16384
-iperm
=
$perm
-operm
=
$perm
-vlayout
=
c
-range_q
=
240
-range_k
=
240
-range_v
=
240
-range_p
=
240
-range_o
=
240
-kname
=
1
-v
=
$VALID
;
sleep
3
done
\ No newline at end of file
example/ck_tile/18_flexattn/script/run_full_test.sh
0 → 100755
View file @
81e68c64
#!/bin/bash
#
# in order to run this script you'd first need to build the tile_example_fmha_fwd and tile_eaxmple_fmha_bwd executables in ../build/bin/
#
# run the script as "./run_full_test.sh <tag for your test environment> <branch name> <host name> <gpu_arch>
# input arguments:
# environment tag : a string describing the specifics of your test environment
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
# host name : $hostname
# gpu architecture: e.g., gfx90a, or gfx942, etc.
#get the command line arguments:
export
env_type
=
$1
echo
'Environment type: '
$env_type
export
branch
=
$2
echo
'Branch name: '
$branch
export
host_name
=
$3
echo
'Host name: '
$host_name
export
GPU_arch
=
$4
echo
'GPU_arch: '
$GPU_arch
function
print_log_header
(){
rm
-f
$1
;
echo
'On branch '
$3
&>
$1
;
echo
'Node name: '
$4
>>
$1
;
#get GPU_arch and number of compute units from rocminfo
echo
-n
"GPU_arch: "
>>
$1
;
rocminfo |
grep
"Name:"
|
grep
"gfx"
>>
$1
;
rocminfo |
grep
"Compute Unit:"
>>
$1
;
hipcc
--version
|
grep
-e
'HIP version'
>>
$1
;
echo
'Environment type: '
$2
>>
$1
;
/opt/rocm/bin/amdclang++
--version
|
grep
-e
'InstalledDir'
>>
$1
;
}
#run verification tests
example/ck_tile/01_fmha/script/smoke_test_fwd.sh
example/ck_tile/01_fmha/script/smoke_test_bwd.sh
#run performance benchmarks
export
fmha_fwd_log
=
"perf_fmha_fwd_
$GPU_arch
.log"
print_log_header
$fmha_fwd_log
$env_type
$branch
$host_name
example/ck_tile/01_fmha/script/benchmark_fwd.sh 2>&1 |
tee
-a
$fmha_fwd_log
export
fmha_bwd_log
=
"perf_fmha_bwd_
$GPU_arch
.log"
print_log_header
$fmha_bwd_log
$env_type
$branch
$host_name
example/ck_tile/01_fmha/script/benchmark_bwd.sh 2>&1 |
tee
-a
$fmha_bwd_log
example/ck_tile/18_flexattn/script/smoke_test_bwd.sh
0 → 100755
View file @
81e68c64
#!/bin/sh
# TODO: run this script from CK root or build directory
EXE
=
"
$(
find
.
-name
tile_example_fmha_bwd
-type
f |
head
-n
1
)
"
KNAME
=
1
export
CK_WARMUP
=
0
export
CK_REPEAT
=
1
COMMON_ARGS
=
'-v=1'
set
-x
for
prec
in
"fp16"
"bf16"
;
do
for
perm
in
0 1
;
do
for
hdim
in
32 64 128 256
;
do
for
mode
in
0 1
;
do
for
bias
in
"n"
"a"
;
do
for
dbias
in
0
;
do
for
p_drop
in
0.0 0.2
;
do
for
deterministic
in
0
;
do
$EXE
-prec
=
$prec
-b
=
1
-h
=
4
-h_k
=
2
-d
=
$hdim
-s
=
259
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
2
-h
=
2
-d
=
$hdim
-s
=
516
-s_k
=
253
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
1
-h
=
4
-h_k
=
1
-d
=
$hdim
-s
=
500
-s_k
=
251
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-mask
=
1
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
1
-h
=
2
-d
=
$hdim
-s
=
900
-s_k
=
258
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-mask
=
2
-v
=
1
-deterministic
=
$deterministic
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
2
-h
=
1
-d
=
$hdim
-s
=
987
-s_k
=
219
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-mask
=
t:128,30
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
$EXE
-prec
=
$prec
-b
=
2
-h
=
3
-h_k
=
1
-d
=
$hdim
-s
=
244
-s_k
=
499
-bias
=
$bias
-dbias
=
$dbias
-p_drop
=
$p_drop
-iperm
=
$perm
-operm
=
$perm
-mask
=
b:4,35
-deterministic
=
$deterministic
-v
=
1
-mode
=
$mode
-kname
=
$KNAME
$COMMON_ARGS
done
done
done
done
done
done
done
done
set
+x
example/ck_tile/18_flexattn/script/smoke_test_fwd.sh
0 → 100755
View file @
81e68c64
This diff is collapsed.
Click to expand it.
example/ck_tile/18_flexattn/utils.hpp
0 → 100644
View file @
81e68c64
This diff is collapsed.
Click to expand it.
example/ck_tile/CMakeLists.txt
View file @
81e68c64
...
@@ -17,4 +17,5 @@ add_subdirectory(14_moe_smoothquant)
...
@@ -17,4 +17,5 @@ add_subdirectory(14_moe_smoothquant)
add_subdirectory
(
15_fused_moe
)
add_subdirectory
(
15_fused_moe
)
add_subdirectory
(
16_batched_gemm
)
add_subdirectory
(
16_batched_gemm
)
add_subdirectory
(
17_grouped_gemm
)
add_subdirectory
(
17_grouped_gemm
)
add_subdirectory
(
18_flexattn
)
add_subdirectory
(
35_batched_transpose
)
add_subdirectory
(
35_batched_transpose
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment