Commit 027c8da0 authored by one's avatar one
Browse files

Add scripts for ROCBLAS benchmarking and Evo2 model execution

parent 3e8ab137
#!/bin/bash
grep 'rocblas-bench' ${1:-count.txt} | sort | uniq -c | sort -nr
#!/bin/bash
export HIP_VISIBLE_DEVICES=1
chmod u+x /opt/dtk/lib/rocblas/benchmark_tool/rocblas-bench
PROF_CMD='hipprof --hip-trace'
BENCH_CMD='numactl -m 0 -N 0 /opt/dtk/lib/rocblas/benchmark_tool/rocblas-bench'
BATCH_SIZE=1
LOG_PREFIX=log/trace-rocblas
echo
echo "===== Kernel 1 ====="
#RUN_PROF="${PROF_CMD} -o ${LOG_PREFIX}-k1-bs${BATCH_SIZE}"
${RUN_PROF} ${BENCH_CMD} -f gemm_ex --transposeA T --transposeB N -m 11264 -n ${BATCH_SIZE} -k 4096 --alpha 1 --a_type bf16_r --lda 4096 --b_type bf16_r --ldb 4096 --beta 0 --c_type bf16_r --ldc 11264 --d_type bf16_r --ldd 11264 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
echo
echo "===== Kernel 2 ====="
#RUN_PROF="${PROF_CMD} -o ${LOG_PREFIX}-k2-bs${BATCH_SIZE}"
${RUN_PROF} ${BENCH_CMD} -f gemm_ex --transposeA T --transposeB N -m 4096 -n ${BATCH_SIZE} -k 11264 --alpha 1 --a_type bf16_r --lda 11264 --b_type bf16_r --ldb 11264 --beta 0 --c_type bf16_r --ldc 4096 --d_type bf16_r --ldd 4096 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
echo
echo "===== Kernel 3 ====="
#RUN_PROF="${PROF_CMD} -o ${LOG_PREFIX}-k3-bs${BATCH_SIZE}"
${RUN_PROF} ${BENCH_CMD} -f gemm_ex --transposeA T --transposeB N -m 12288 -n ${BATCH_SIZE} -k 4096 --alpha 1 --a_type bf16_r --lda 4096 --b_type bf16_r --ldb 4096 --beta 0 --c_type bf16_r --ldc 12288 --d_type bf16_r --ldd 12288 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
echo
echo "===== Kernel 4 ====="
#RUN_PROF="${PROF_CMD} -o ${LOG_PREFIX}-k4-bs${BATCH_SIZE}"
${RUN_PROF} ${BENCH_CMD} -f gemm_ex --transposeA T --transposeB N -m 4096 -n ${BATCH_SIZE} -k 4096 --alpha 1 --a_type bf16_r --lda 4096 --b_type bf16_r --ldb 4096 --beta 1 --c_type bf16_r --ldc 4096 --d_type bf16_r --ldd 4096 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
#!/bin/bash
export MODEL_PATH=/models/arcinstitute/evo2_7b
export HIP_VISIBLE_DEVICES=1
#export MIOPEN_ENABLE_LOGGING_CMD=1
#export MIOPEN_ENABLE_LOGGING=1
#export MIOPEN_LOG_LEVEL=6
#export ROCBLAS_LAYER=3
BATCH_SIZE=2
EVO2_CMD="numactl -m 0 -N 0 python -m evo2.test.test_evo2_generation_batched --model_name evo2_7b --local_path ${MODEL_PATH}/evo2_7b.pt --batch_size ${BATCH_SIZE}"
# EVO2_CMD="numactl -m 0 -N 0 python -m evo2.test.test_evo2_generation --model_name evo2_7b --local_path ${MODEL_PATH}/evo2_7b.pt"
#${EVO2_CMD}
hipprof --hip-trace -o log/trace-padding-bs${BATCH_SIZE} ${EVO2_CMD}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment