import os import time import torch import torchvision import torch.nn as nn import torchvision.transforms as transforms from torch.utils.data import DataLoader from tqdm import tqdm import numpy as np # 配置参数 class Config: def __init__(self): self.data_root = "/public/opendas/DL_DATA/ImageNet-pytorch/val" # ImageNet验证集路径 self.batch_size = 128 * 8 # 总批次大小(每个卡128,8卡共1024) self.num_workers = 16 # 数据加载线程数(适当增加) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.top_k = (1, 5) # 评估Top-1和Top-5准确率 self.pin_memory = True if torch.cuda.is_available() else False # 内存固定(加速GPU传输) self.num_gpus = torch.cuda.device_count() # 获取可用GPU数量 # 初始化配置 config = Config() def prepare_imagenet_data(data_root, batch_size, num_workers, pin_memory): """准备ImageNet验证集数据加载器""" # ImageNet标准预处理(与训练时一致) normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], # ImageNet数据集的均值 std=[0.229, 0.224, 0.225] # ImageNet数据集的标准差 ) # 验证集数据变换:Resize→CenterCrop→转Tensor→归一化 val_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) # 加载验证集(需按ImageFolder格式组织:每个类别一个子文件夹) val_dataset = torchvision.datasets.ImageFolder( root=data_root, transform=val_transform ) # 创建数据加载器 val_loader = DataLoader( val_dataset, batch_size=batch_size, shuffle=False, # 验证集无需打乱 num_workers=num_workers, pin_memory=pin_memory, drop_last=False # 保留最后一个不足批次的数据 ) return val_loader, val_dataset.classes def load_resnet50_model(pretrained=True): """加载预训练的ResNet50模型并包装为多卡并行模型""" model = torchvision.models.resnet50(pretrained=pretrained) model = model.to(config.device) # 先移动到GPU # 如果有多个GPU,使用DataParallel包装模型 if config.num_gpus > 1: print(f"使用 {config.num_gpus} 个GPU进行推理") model = nn.DataParallel(model) # 默认使用所有可用GPU model.eval() # 切换到评估模式(关闭Dropout、BN固定等) return model def evaluate(model, val_loader, top_k=(1,)): """在验证集上进行推理并计算准确率""" total = 0 correct = {k: 0 for k in top_k} # 存储各Top-K的正确样本数 # 关闭梯度计算(加速推理,减少内存占用) with torch.no_grad(): start_time = time.time() # 遍历验证集 for images, labels in tqdm(val_loader, desc="推理中"): # 数据移动到设备 images = images.to(config.device, non_blocking=True) labels = labels.to(config.device, non_blocking=True) # 模型前向传播(多卡时会自动分配数据) outputs = model(images) # shape: (batch_size, 1000) # 计算Top-K准确率 for k in top_k: # 获取Top-K预测结果 _, pred = torch.topk(outputs, k, dim=1) # pred shape: (batch_size, k) # 与真实标签比较(扩展标签维度以便广播) correct[k] += (pred == labels.unsqueeze(1)).any(dim=1).sum().item() total += labels.size(0) # 累计总样本数 # 计算耗时和速度 elapsed_time = time.time() - start_time speed = total / elapsed_time # 样本/秒 # 计算准确率(百分比) acc = {k: (correct[k] / total) * 100 for k in top_k} return acc, elapsed_time, speed def main(): # 打印配置信息 print(f"使用设备: {config.device}") print(f"可用GPU数量: {config.num_gpus}") print(f"验证集路径: {config.data_root}") print(f"总批次大小: {config.batch_size}, 每个GPU批次大小: {config.batch_size // config.num_gpus}") print(f"线程数: {config.num_workers}") print(f"评估指标: Top-{config.top_k}准确率") # 准备数据 print("\n加载验证集数据...") val_loader, class_names = prepare_imagenet_data( config.data_root, config.batch_size, config.num_workers, config.pin_memory ) print(f"验证集样本总数: {len(val_loader.dataset)}, 类别数: {len(class_names)}") # 加载模型 print("\n加载ResNet50模型...") model = load_resnet50_model(pretrained=True) print("模型加载完成(预训练权重)") # 执行推理和评估 print("\n开始推理...") acc, elapsed, speed = evaluate(model, val_loader, config.top_k) # 输出结果 print("\n===== 推理结果 =====") for k in config.top_k: print(f"Top-{k} 准确率: {acc[k]:.2f}%") print(f"总耗时: {elapsed:.2f}秒") print(f"推理速度: {speed:.2f}样本/秒") print("====================") if __name__ == "__main__": # 确保CUDA可见设备设置正确(如果需要指定特定GPU) # os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7" # 可选:指定使用的GPU编号 main()