import torch import torchvision.models as models import time import json def run_resnet50_benchmark(): """运行 ResNet50 推理基准测试,batch_size=64""" print("--- PyTorch ResNet50 Benchmark (batch_size=64) ---") # 1. 检查设备 (CPU or GPU) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Running on device: {device}") # 2. 加载预训练的 ResNet50 模型 print("Loading ResNet50 model with pretrained weights...") model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2).to(device) # 设置为评估模式,关闭 Dropout 等训练层 model.eval() # 3. 准备输入数据,batch_size=64 batch_size = 64 # ImageNet 标准输入尺寸: (batch_size, channels, height, width) input_tensor = torch.randn(batch_size, 3, 224, 224).to(device) print(f"Input tensor shape: {input_tensor.shape}") # 4. 预热 (非常重要!避免首次运行的初始化开销影响结果) print("Warming up...") with torch.no_grad(): # 关闭梯度计算,节省内存和计算 for _ in range(10): _ = model(input_tensor) # 5. 正式进行基准测试 print("Running benchmark...") num_runs = 100 start_time = time.time() with torch.no_grad(): for _ in range(num_runs): _ = model(input_tensor) end_time = time.time() # 6. 计算性能指标 total_time = end_time - start_time total_images = num_runs * batch_size # 吞吐量: 每秒处理的图片数量 throughput = total_images / total_time # 平均延迟: 处理单张图片的平均时间 (毫秒) avg_latency_ms = (total_time / total_images) * 1000 # 7. 整理结果 results = { "model": "ResNet50", "device": str(device), "batch_size": batch_size, "num_runs": num_runs, "total_time_s": round(total_time, 4), "throughput_imgs_per_sec": round(throughput, 2), "avg_latency_ms_per_img": round(avg_latency_ms, 4) } # 8. 打印并保存结果到 JSON 文件 print("\n--- Benchmark Results ---") print(json.dumps(results, indent=4)) with open("results.json", "w") as f: json.dump(results, f, indent=4) print(f"\nBenchmark finished. Results saved to results.json") if __name__ == "__main__": run_resnet50_benchmark()