feat: Run benchmark in pre-configured DCU container via docker executor

3b726d83 · jerrrrry · 3b726d83 · 3b726d83
Commit 3b726d83 authored Jan 16, 2026 by jerrrrry
Hide whitespace changes
Inline Side-by-side

Showing with 97 additions and 0 deletions

.gitlab-ci.yml .gitlab-ci.yml +21 -0

benchmark_resnet50.py benchmark_resnet50.py +76 -0

No files found.
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+# 在文件顶层指定要使用的镜像
+# GitLab Runner 会使用这个镜像来创建任务容器
+image: image.sourcefind.cn:5000/dcu/admin/base/custom:vllm0.8.5-ubuntu22.04-dtk25.04-rc7-das1.5-py3.10-20250612-fixpy-rocblas0611-rc2
+
+stages:
+  - test
+
+resnet_benchmark_job:
+  stage: test
+  # 关键：指定使用我们刚刚注册的、带有 'dcu' 标签的 Runner
+  tags:
+    - dcu
+  script:
+    - echo "Running inside the powerful DCU container..."
+    # 检查环境
+    - python -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"}')"
+    # 运行我们的压测脚本
+    - python benchmark_resnet50.py
+  artifacts:
+    paths:
+      - results.json
--- a/benchmark_resnet50.py
+++ b/benchmark_resnet50.py
+import torch
+import torchvision.models as models
+import time
+import json
+
+def run_resnet50_benchmark():
+    """运行 ResNet50 推理基准测试，batch_size=64"""
+    print("--- PyTorch ResNet50 Benchmark (batch_size=64) ---")
+
+    # 1. 检查设备 (CPU or GPU)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Running on device: {device}")
+
+    # 2. 加载预训练的 ResNet50 模型
+    print("Loading ResNet50 model with pretrained weights...")
+    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2).to(device)
+    # 设置为评估模式，关闭 Dropout 等训练层
+    model.eval()
+
+    # 3. 准备输入数据，batch_size=64
+    batch_size = 64
+    # ImageNet 标准输入尺寸: (batch_size, channels, height, width)
+    input_tensor = torch.randn(batch_size, 3, 224, 224).to(device)
+    print(f"Input tensor shape: {input_tensor.shape}")
+
+    # 4. 预热 (非常重要！避免首次运行的初始化开销影响结果)
+    print("Warming up...")
+    with torch.no_grad():  # 关闭梯度计算，节省内存和计算
+        for _ in range(10):
+            _ = model(input_tensor)
+
+    # 5. 正式进行基准测试
+    print("Running benchmark...")
+    num_runs = 100
+    start_time = time.time()
+
+    with torch.no_grad():
+        for _ in range(num_runs):
+            _ = model(input_tensor)
+
+    end_time = time.time()
+
+    # 6. 计算性能指标
+    total_time = end_time - start_time
+    total_images = num_runs * batch_size
+    
+    # 吞吐量: 每秒处理的图片数量
+    throughput = total_images / total_time
+    
+    # 平均延迟: 处理单张图片的平均时间 (毫秒)
+    avg_latency_ms = (total_time / total_images) * 1000
+
+    # 7. 整理结果
+    results = {
+        "model": "ResNet50",
+        "device": str(device),
+        "batch_size": batch_size,
+        "num_runs": num_runs,
+        "total_time_s": round(total_time, 4),
+        "throughput_imgs_per_sec": round(throughput, 2),
+        "avg_latency_ms_per_img": round(avg_latency_ms, 4)
+    }
+
+    # 8. 打印并保存结果到 JSON 文件
+    print("\n--- Benchmark Results ---")
+    print(json.dumps(results, indent=4))
+    
+    with open("results.json", "w") as f:
+        json.dump(results, f, indent=4)
+        
+    print(f"\nBenchmark finished. Results saved to results.json")
+
+
+if __name__ == "__main__":
+    run_resnet50_benchmark()
+