test.sh 308 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
/root/miniconda3/envs/py312/bin/python3 examples/amd/example_amd_flash_attn_fwd.py \
    --batch 2 \
    --heads 16 \
    --seq_len 4096 \
    --dim 128 \
    --is_causal \
    --groups 2

/root/composable_kernel/build/bin/tile_example_fmha_fwd  \
-b=2 -h=16 -s=4096 -d=128 -mask=t -v=1 -warmup=5 -repeat=20