run-rocblas.sh 2.25 KB
Newer Older
one's avatar
one committed
1
2
3
4
5
#!/bin/bash

chmod u+x /opt/dtk/lib/rocblas/benchmark_tool/*
export PATH=/opt/dtk/lib/rocblas/benchmark_tool/:${PATH}

6
7
# BW1000
BIND_CMD="numactl -m 1 -N 1"
one's avatar
one committed
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
BATCH_SIZE=1

export HIP_VISIBLE_DEVICES=1
# export ROCBLAS_TENSILE_GEMM_OVERRIDE_PATH=$(PWD)/tensil_gemms.csv

W1="-f gemm_ex --transposeA T --transposeB N -m 11264 -n ${BATCH_SIZE} -k 4096 --alpha 1 --a_type bf16_r --lda 4096 --b_type bf16_r --ldb 4096 --beta 0 --c_type bf16_r --ldc 11264 --d_type bf16_r --ldd 11264 --compute_type f32_r --algo 0 --solution_index 0 --flags 0"
W2="-f gemm_ex --transposeA T --transposeB N -m 4096 -n ${BATCH_SIZE} -k 11264 --alpha 1 --a_type bf16_r --lda 11264 --b_type bf16_r --ldb 11264 --beta 0 --c_type bf16_r --ldc 4096 --d_type bf16_r --ldd 4096 --compute_type f32_r --algo 0 --solution_index 0 --flags 0"
W3="-f gemm_ex --transposeA T --transposeB N -m 12288 -n ${BATCH_SIZE} -k 4096 --alpha 1 --a_type bf16_r --lda 4096 --b_type bf16_r --ldb 4096 --beta 0 --c_type bf16_r --ldc 12288 --d_type bf16_r --ldd 12288 --compute_type f32_r --algo 0 --solution_index 0 --flags 0"
W4="-f gemm_ex --transposeA T --transposeB N -m 4096 -n ${BATCH_SIZE} -k 4096 --alpha 1 --a_type bf16_r --lda 4096 --b_type bf16_r --ldb 4096 --beta 1 --c_type bf16_r --ldc 4096 --d_type bf16_r --ldd 4096 --compute_type f32_r --algo 0 --solution_index 0 --flags 0"

if [[ "$*" == *"--pmc"* ]]; then
    PROF_CMD="hipprof --trace-off --pmc"
    ${PROF_CMD} -o log/pmc-blas-w1-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W1}
    ${PROF_CMD} -o log/pmc-blas-w2-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W2}
    ${PROF_CMD} -o log/pmc-blas-w3-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W3}
    ${PROF_CMD} -o log/pmc-blas-w4-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W4}
elif [[ "$*" == *"--trace"* ]]; then
    PROF_CMD="hipprof --hip-trace"
    ${PROF_CMD} -o log/trace-blas-w1-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W1}
    ${PROF_CMD} -o log/trace-blas-w2-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W2}
    ${PROF_CMD} -o log/trace-blas-w3-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W3}
    ${PROF_CMD} -o log/trace-blas-w4-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W4}
else
    ${BIND_CMD} rocblas-bench ${W1}
    ${BIND_CMD} rocblas-bench ${W2}
    ${BIND_CMD} rocblas-bench ${W3}
    ${BIND_CMD} rocblas-bench ${W4}
fi