#!/bin/bash # GEMM Benchmark Shell Script # 测试各种矩阵形状和数据类型的GEMM性能 export ROCBLAS_TENSILE_LIBPATH=/opt/dtk-26.04/lib/rocblas/auto_select_test/auto_select_tools/optimization_configs/new/config/library_gpu5/ export HIP_VISIBLE_DEVICES=3 set -e # 颜色输出 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # 日志文件 LOG_FILE="gemm_benchmark_$(date +%Y%m%d_%H%M%S).log" CSV_FILE="gemm_benchmark_results_$(date +%Y%m%d_%H%M%S).csv" # Python脚本路径 PYTHON_SCRIPT="gemm_benchmark.py" # 默认参数 WARMUP_ITER=50 BENCH_ITER=1000 # 打印带颜色的信息 print_info() { echo -e "${BLUE}[INFO]${NC} $1" | tee -a "$LOG_FILE" } print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$LOG_FILE" } print_error() { echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE" } print_section() { echo -e "\n${YELLOW}========================================${NC}" | tee -a "$LOG_FILE" echo -e "${YELLOW}$1${NC}" | tee -a "$LOG_FILE" echo -e "${YELLOW}========================================${NC}" | tee -a "$LOG_FILE" } # 初始化CSV文件 init_csv() { echo "Shape_M,Shape_K,Shape_N,DataType,Latency_us,TFLOPS,Status" > "$CSV_FILE" } # 执行单个测试 run_test() { local M=$1 local K=$2 local N=$3 local dtype=$4 print_info "Testing: M=$M, K=$K, N=$N, dtype=$dtype" # 执行Python脚本并捕获输出 output=$(python3 "$PYTHON_SCRIPT" \ --M "$M" \ --K "$K" \ --N "$N" \ --dtype "$dtype" \ --warmup_iterations "$WARMUP_ITER" \ --bench_iterations "$BENCH_ITER" --transA 2>&1) # 提取延迟和TFLOPS latency=$(echo "$output" | grep "Average latency:" | awk '{print $3}') tflops=$(echo "$output" | grep "Performance:" | awk '{print $2}') if [ -n "$latency" ] && [ -n "$tflops" ]; then print_success " Latency: ${latency} μs, TFLOPS: ${tflops}" echo "$M,$K,$N,$dtype,$latency,$tflops,SUCCESS" >> "$CSV_FILE" else print_error " Test failed for $dtype" echo "$M,$K,$N,$dtype,0,0,FAILED" >> "$CSV_FILE" fi echo "" >> "$LOG_FILE" } # 测试场景1: M=K=N (2的幂次) test_power_of_two() { print_section "Test Case 1: Square matrices (power of 2)" local sizes=(128 256 512 1024 2048 4096 8192) for size in "${sizes[@]}"; do print_info "Testing square matrix: $size x $size" for dtype in "${dtypes[@]}"; do run_test "$size" "$size" "$size" "$dtype" done done } # 测试场景2: M=K=N (非对齐) test_non_aligned() { print_section "Test Case 2: Square matrices (non-aligned)" local sizes=(4098 8190) for size in "${sizes[@]}"; do print_info "Testing square matrix: $size x $size" for dtype in "${dtypes[@]}"; do run_test "$size" "$size" "$size" "$dtype" done done } # 测试场景3: 特定形状 test_specific_shape() { print_section "Test Case 3: Specific shape (M=8192, K=768, N=8192)" run_test 8192 768 8192 "float64" run_test 8192 768 8192 "float32" run_test 8192 768 8192 "float16" run_test 8192 768 8192 "bfloat16" run_test 8192 768 8192 "tf32" run_test 8192 768 8192 "mixed_fp16_fp32" run_test 8192 768 8192 "mixed_bf16_fp32" run_test 8192 768 8192 "mixed_int8_int32" run_test 8192 768 8192 "mixed_tf32_fp32" run_test 8192 768 8192 "w8a8" } # 检查CUDA是否可用 check_cuda() { print_info "Checking CUDA availability..." if python3 -c "import torch; assert torch.cuda.is_available()" 2>/dev/null; then cuda_version=$(python3 -c "import torch; print(torch.version.cuda)") gpu_name=$(python3 -c "import torch; print(torch.cuda.get_device_name(0))") print_success "CUDA available: $cuda_version" print_success "GPU: $gpu_name" echo "CUDA Version: $cuda_version" >> "$LOG_FILE" echo "GPU: $gpu_name" >> "$LOG_FILE" else print_error "CUDA not available. Exiting." exit 1 fi } # 主函数 main() { print_info "Starting GEMM Benchmark Suite" print_info "Log file: $LOG_FILE" print_info "Results CSV: $CSV_FILE" # 检查CUDA check_cuda # 初始化CSV init_csv # 记录系统信息 echo "System Information:" >> "$LOG_FILE" echo "Date: $(date)" >> "$LOG_FILE" echo "Hostname: $(hostname)" >> "$LOG_FILE" echo "Python: $(which python3)" >> "$LOG_FILE" echo "PyTorch: $(python3 -c 'import torch; print(torch.__version__)')" >> "$LOG_FILE" echo "" >> "$LOG_FILE" # 定义要测试的数据类型 dtypes=( "float64" "float32" "float16" "bfloat16" "tf32" "mixed_fp16_fp32" "mixed_bf16_fp32" "mixed_int8_int32" "mixed_tf32_fp32" "w8a8" ) # 执行测试 test_power_of_two test_non_aligned test_specific_shape print_section "Benchmark Complete" print_success "All tests finished. Results saved to $CSV_FILE" print_info "Log saved to $LOG_FILE" # 显示结果摘要 echo -e "\n${GREEN}Results Summary:${NC}" echo "=========================================" echo "CSV file: $CSV_FILE" echo "Log file: $LOG_FILE" echo "" echo "To view results:" echo " cat $CSV_FILE" echo " or use a spreadsheet application" } # 运行主函数 main