Commit 84b2558d authored by yangzhong's avatar yangzhong
Browse files

8卡推理代码

parent 7a436760
import os
import time
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
# 配置参数
class Config:
def __init__(self):
self.data_root = "/public/opendas/DL_DATA/ImageNet-pytorch/val" # ImageNet验证集路径
self.batch_size = 128 * 8 # 总批次大小(每个卡128,8卡共1024)
self.num_workers = 16 # 数据加载线程数(适当增加)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.top_k = (1, 5) # 评估Top-1和Top-5准确率
self.pin_memory = True if torch.cuda.is_available() else False # 内存固定(加速GPU传输)
self.num_gpus = torch.cuda.device_count() # 获取可用GPU数量
# 初始化配置
config = Config()
def prepare_imagenet_data(data_root, batch_size, num_workers, pin_memory):
"""准备ImageNet验证集数据加载器"""
# ImageNet标准预处理(与训练时一致)
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406], # ImageNet数据集的均值
std=[0.229, 0.224, 0.225] # ImageNet数据集的标准差
)
# 验证集数据变换:Resize→CenterCrop→转Tensor→归一化
val_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])
# 加载验证集(需按ImageFolder格式组织:每个类别一个子文件夹)
val_dataset = torchvision.datasets.ImageFolder(
root=data_root,
transform=val_transform
)
# 创建数据加载器
val_loader = DataLoader(
val_dataset,
batch_size=batch_size,
shuffle=False, # 验证集无需打乱
num_workers=num_workers,
pin_memory=pin_memory,
drop_last=False # 保留最后一个不足批次的数据
)
return val_loader, val_dataset.classes
def load_resnet50_model(pretrained=True):
"""加载预训练的ResNet50模型并包装为多卡并行模型"""
model = torchvision.models.resnet50(pretrained=pretrained)
model = model.to(config.device) # 先移动到GPU
# 如果有多个GPU,使用DataParallel包装模型
if config.num_gpus > 1:
print(f"使用 {config.num_gpus} 个GPU进行推理")
model = nn.DataParallel(model) # 默认使用所有可用GPU
model.eval() # 切换到评估模式(关闭Dropout、BN固定等)
return model
def evaluate(model, val_loader, top_k=(1,)):
"""在验证集上进行推理并计算准确率"""
total = 0
correct = {k: 0 for k in top_k} # 存储各Top-K的正确样本数
# 关闭梯度计算(加速推理,减少内存占用)
with torch.no_grad():
start_time = time.time()
# 遍历验证集
for images, labels in tqdm(val_loader, desc="推理中"):
# 数据移动到设备
images = images.to(config.device, non_blocking=True)
labels = labels.to(config.device, non_blocking=True)
# 模型前向传播(多卡时会自动分配数据)
outputs = model(images) # shape: (batch_size, 1000)
# 计算Top-K准确率
for k in top_k:
# 获取Top-K预测结果
_, pred = torch.topk(outputs, k, dim=1) # pred shape: (batch_size, k)
# 与真实标签比较(扩展标签维度以便广播)
correct[k] += (pred == labels.unsqueeze(1)).any(dim=1).sum().item()
total += labels.size(0) # 累计总样本数
# 计算耗时和速度
elapsed_time = time.time() - start_time
speed = total / elapsed_time # 样本/秒
# 计算准确率(百分比)
acc = {k: (correct[k] / total) * 100 for k in top_k}
return acc, elapsed_time, speed
def main():
# 打印配置信息
print(f"使用设备: {config.device}")
print(f"可用GPU数量: {config.num_gpus}")
print(f"验证集路径: {config.data_root}")
print(f"总批次大小: {config.batch_size}, 每个GPU批次大小: {config.batch_size // config.num_gpus}")
print(f"线程数: {config.num_workers}")
print(f"评估指标: Top-{config.top_k}准确率")
# 准备数据
print("\n加载验证集数据...")
val_loader, class_names = prepare_imagenet_data(
config.data_root,
config.batch_size,
config.num_workers,
config.pin_memory
)
print(f"验证集样本总数: {len(val_loader.dataset)}, 类别数: {len(class_names)}")
# 加载模型
print("\n加载ResNet50模型...")
model = load_resnet50_model(pretrained=True)
print("模型加载完成(预训练权重)")
# 执行推理和评估
print("\n开始推理...")
acc, elapsed, speed = evaluate(model, val_loader, config.top_k)
# 输出结果
print("\n===== 推理结果 =====")
for k in config.top_k:
print(f"Top-{k} 准确率: {acc[k]:.2f}%")
print(f"总耗时: {elapsed:.2f}秒")
print(f"推理速度: {speed:.2f}样本/秒")
print("====================")
if __name__ == "__main__":
# 确保CUDA可见设备设置正确(如果需要指定特定GPU)
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7" # 可选:指定使用的GPU编号
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment