#!/bin/bash chmod u+x /opt/dtk/lib/rocblas/benchmark_tool/* export PATH=/opt/dtk/lib/rocblas/benchmark_tool/:${PATH} # BW1000 BIND_CMD="numactl -m 1 -N 1" BATCH_SIZE=1 export HIP_VISIBLE_DEVICES=1 # export ROCBLAS_TENSILE_GEMM_OVERRIDE_PATH=$(PWD)/tensil_gemms.csv W1="-f gemm_ex --transposeA T --transposeB N -m 11264 -n ${BATCH_SIZE} -k 4096 --alpha 1 --a_type bf16_r --lda 4096 --b_type bf16_r --ldb 4096 --beta 0 --c_type bf16_r --ldc 11264 --d_type bf16_r --ldd 11264 --compute_type f32_r --algo 0 --solution_index 0 --flags 0" W2="-f gemm_ex --transposeA T --transposeB N -m 4096 -n ${BATCH_SIZE} -k 11264 --alpha 1 --a_type bf16_r --lda 11264 --b_type bf16_r --ldb 11264 --beta 0 --c_type bf16_r --ldc 4096 --d_type bf16_r --ldd 4096 --compute_type f32_r --algo 0 --solution_index 0 --flags 0" W3="-f gemm_ex --transposeA T --transposeB N -m 12288 -n ${BATCH_SIZE} -k 4096 --alpha 1 --a_type bf16_r --lda 4096 --b_type bf16_r --ldb 4096 --beta 0 --c_type bf16_r --ldc 12288 --d_type bf16_r --ldd 12288 --compute_type f32_r --algo 0 --solution_index 0 --flags 0" W4="-f gemm_ex --transposeA T --transposeB N -m 4096 -n ${BATCH_SIZE} -k 4096 --alpha 1 --a_type bf16_r --lda 4096 --b_type bf16_r --ldb 4096 --beta 1 --c_type bf16_r --ldc 4096 --d_type bf16_r --ldd 4096 --compute_type f32_r --algo 0 --solution_index 0 --flags 0" if [[ "$*" == *"--pmc"* ]]; then PROF_CMD="hipprof --trace-off --pmc" ${PROF_CMD} -o log/pmc-blas-w1-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W1} ${PROF_CMD} -o log/pmc-blas-w2-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W2} ${PROF_CMD} -o log/pmc-blas-w3-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W3} ${PROF_CMD} -o log/pmc-blas-w4-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W4} elif [[ "$*" == *"--trace"* ]]; then PROF_CMD="hipprof --hip-trace" ${PROF_CMD} -o log/trace-blas-w1-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W1} ${PROF_CMD} -o log/trace-blas-w2-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W2} ${PROF_CMD} -o log/trace-blas-w3-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W3} ${PROF_CMD} -o log/trace-blas-w4-bs${BATCH_SIZE} ${BIND_CMD} rocblas-bench ${W4} else ${BIND_CMD} rocblas-bench ${W1} ${BIND_CMD} rocblas-bench ${W2} ${BIND_CMD} rocblas-bench ${W3} ${BIND_CMD} rocblas-bench ${W4} fi