Commit 3b726d83 authored by jerrrrry's avatar jerrrrry
Browse files

feat: Run benchmark in pre-configured DCU container via docker executor

parents
# 在文件顶层指定要使用的镜像
# GitLab Runner 会使用这个镜像来创建任务容器
image: image.sourcefind.cn:5000/dcu/admin/base/custom:vllm0.8.5-ubuntu22.04-dtk25.04-rc7-das1.5-py3.10-20250612-fixpy-rocblas0611-rc2
stages:
- test
resnet_benchmark_job:
stage: test
# 关键:指定使用我们刚刚注册的、带有 'dcu' 标签的 Runner
tags:
- dcu
script:
- echo "Running inside the powerful DCU container..."
# 检查环境
- python -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"}')"
# 运行我们的压测脚本
- python benchmark_resnet50.py
artifacts:
paths:
- results.json
import torch
import torchvision.models as models
import time
import json
def run_resnet50_benchmark():
"""运行 ResNet50 推理基准测试,batch_size=64"""
print("--- PyTorch ResNet50 Benchmark (batch_size=64) ---")
# 1. 检查设备 (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on device: {device}")
# 2. 加载预训练的 ResNet50 模型
print("Loading ResNet50 model with pretrained weights...")
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2).to(device)
# 设置为评估模式,关闭 Dropout 等训练层
model.eval()
# 3. 准备输入数据,batch_size=64
batch_size = 64
# ImageNet 标准输入尺寸: (batch_size, channels, height, width)
input_tensor = torch.randn(batch_size, 3, 224, 224).to(device)
print(f"Input tensor shape: {input_tensor.shape}")
# 4. 预热 (非常重要!避免首次运行的初始化开销影响结果)
print("Warming up...")
with torch.no_grad(): # 关闭梯度计算,节省内存和计算
for _ in range(10):
_ = model(input_tensor)
# 5. 正式进行基准测试
print("Running benchmark...")
num_runs = 100
start_time = time.time()
with torch.no_grad():
for _ in range(num_runs):
_ = model(input_tensor)
end_time = time.time()
# 6. 计算性能指标
total_time = end_time - start_time
total_images = num_runs * batch_size
# 吞吐量: 每秒处理的图片数量
throughput = total_images / total_time
# 平均延迟: 处理单张图片的平均时间 (毫秒)
avg_latency_ms = (total_time / total_images) * 1000
# 7. 整理结果
results = {
"model": "ResNet50",
"device": str(device),
"batch_size": batch_size,
"num_runs": num_runs,
"total_time_s": round(total_time, 4),
"throughput_imgs_per_sec": round(throughput, 2),
"avg_latency_ms_per_img": round(avg_latency_ms, 4)
}
# 8. 打印并保存结果到 JSON 文件
print("\n--- Benchmark Results ---")
print(json.dumps(results, indent=4))
with open("results.json", "w") as f:
json.dump(results, f, indent=4)
print(f"\nBenchmark finished. Results saved to results.json")
if __name__ == "__main__":
run_resnet50_benchmark()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment