Commit 68bc58a9 authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #996 failed with stages
in 0 seconds
FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk24.04-py310
ENV DEBIAN_FRONTEND=noninteractive
# RUN yum update && yum install -y git cmake wget build-essential
RUN source /opt/dtk-24.04/env.sh
# 安装pip相关依赖
COPY requirements.txt requirements.txt
RUN pip3 install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com -r requirements.txt
# torch==2.1.0
# torchvision==0.16.0
# 模型编码
modelCode=632
# 模型名称
modelName=mobilenetv4_pytorch
# 模型描述
modelDescription=轻量化之王MobileNetV4,手机推理速度3.8ms,在移动CPU、DSP、GPU以及苹果M处理器和谷歌Pixel Edge TPU全都高性能。
# 应用场景
appScenario=推理,训练,制造,电商,医疗,能源,教育
# 框架类型
frameType=pytorch
#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Time : 2024/5/5 10:56
# @Author : 'IReverser'
# @FileName: model_config.py
MNV4ConvSmall_Block_Specs = {
"conv0": {
"block_name": "convbn",
"num_blocks": 1,
"block_specs": [
[3, 32, 3, 2]
],
},
"layer1": {
"block_name": "convbn",
"num_blocks": 2,
"block_specs": [
[32, 32, 3, 2],
[32, 32, 1, 1],
]
},
"layer2": {
"block_name": "convbn",
"num_blocks": 2,
"block_specs": [
[32, 96, 3, 2],
[96, 64, 1, 1]
]
},
"layer3": {
"block_name": "uib",
"num_blocks": 6,
"block_specs": [
[64, 96, 5, 5, True, 2, 3],
[96, 96, 0, 3, True, 1, 2],
[96, 96, 0, 3, True, 1, 2],
[96, 96, 0, 3, True, 1, 2],
[96, 96, 0, 3, True, 1, 2],
[96, 96, 3, 0, True, 1, 4],
]
},
"layer4": {
"block_name": "uib",
"num_blocks": 6,
"block_specs": [
[96, 128, 3, 3, True, 2, 6],
[128, 128, 5, 5, True, 1, 4],
[128, 128, 0, 5, True, 1, 4],
[128, 128, 0, 5, True, 1, 3],
[128, 128, 0, 3, True, 1, 4],
[128, 128, 0, 3, True, 1, 4],
]
},
"layer5": {
"block_name": "convbn",
"num_blocks": 2,
"block_specs": [
[128, 960, 1, 1],
[960, 1280, 1, 1],
]
}
}
MNV4ConvMedium_Block_Specs = {
"conv0": {
"block_name": "convbn",
"num_blocks": 1,
"block_specs": [
[3, 32, 3, 2]
]
},
"layer1": {
"block_name": "fused_ib",
"num_blocks": 1,
"block_specs": [
[32, 48, 2, 4.0, True],
]
},
"layer2": {
"block_name": "uib",
"num_blocks": 2,
"block_specs": [
[48, 80, 3, 5, True, 2, 4],
[80, 80, 3, 3, True, 1, 2],
]
},
"layer3": {
"block_name": "uib",
"num_blocks": 8,
"block_specs": [
[80, 160, 3, 5, True, 2, 6],
[160, 160, 3, 3, True, 1, 4],
[160, 160, 3, 3, True, 1, 4],
[160, 160, 3, 3, True, 1, 4],
[160, 160, 3, 3, True, 1, 4],
[160, 160, 3, 0, True, 1, 4],
[160, 160, 0, 0, True, 1, 2],
[160, 160, 3, 0, True, 1, 4],
]
},
"layer4": {
"block_name": "uib",
"num_blocks": 11,
"block_specs": [
[160, 256, 5, 5, True, 2, 6],
[256, 256, 5, 5, True, 1, 4],
[256, 256, 3, 5, True, 1, 4],
[256, 256, 3, 5, True, 1, 4],
[256, 256, 0, 0, True, 1, 4],
[256, 256, 3, 0, True, 1, 4],
[256, 256, 3, 5, True, 1, 2],
[256, 256, 5, 5, True, 1, 4],
[256, 256, 0, 0, True, 1, 4],
[256, 256, 0, 0, True, 1, 4],
[256, 256, 5, 0, True, 1, 2],
]
},
"layer5": {
"block_name": "convbn",
"num_blocks": 2,
"block_specs": [
[256, 960, 1, 1],
[960, 1280, 1, 1],
]
}
}
MNV4ConvLarge_Block_Specs = {
"conv0": {
"block_name": "convbn",
"num_blocks": 1,
"block_specs": [
[3, 24, 3, 2],
]
},
"layer1": {
"block_name": "fused_ib",
"num_blocks": 1,
"block_specs": [
[24, 48, 2, 4.0, True],
]
},
"layer2": {
"block_name": "uib",
"num_blocks": 2,
"block_specs": [
[48, 96, 3, 5, True, 2, 4],
[96, 96, 3, 3, True, 1, 4],
]
},
"layer3": {
"block_name": "uib",
"num_blocks": 11,
"block_specs": [
[96, 192, 3, 5, True, 2, 4],
[192, 192, 3, 3, True, 1, 4],
[192, 192, 3, 3, True, 1, 4],
[192, 192, 3, 3, True, 1, 4],
[192, 192, 3, 5, True, 1, 4],
[192, 192, 5, 3, True, 1, 4],
[192, 192, 5, 3, True, 1, 4],
[192, 192, 5, 3, True, 1, 4],
[192, 192, 5, 3, True, 1, 4],
[192, 192, 5, 3, True, 1, 4],
[192, 192, 3, 0, True, 1, 4],
]
},
"layer4": {
"block_name": "uib",
"num_blocks": 13,
"block_specs": [
[192, 512, 5, 5, True, 2, 4],
[512, 512, 5, 5, True, 1, 4],
[512, 512, 5, 5, True, 1, 4],
[512, 512, 5, 5, True, 1, 4],
[512, 512, 5, 0, True, 1, 4],
[512, 512, 5, 3, True, 1, 4],
[512, 512, 5, 0, True, 1, 4],
[512, 512, 5, 0, True, 1, 4],
[512, 512, 5, 3, True, 1, 4],
[512, 512, 5, 5, True, 1, 4],
[512, 512, 5, 0, True, 1, 4],
[512, 512, 5, 0, True, 1, 4],
[512, 512, 5, 0, True, 1, 4],
]
},
"layer5": {
"block_name": "convbn",
"num_blocks": 2,
"block_specs": [
[512, 960, 1, 1],
[960, 1280, 1, 1],
]
}
}
def mhsa(num_heads, key_dim, value_dim, px):
if px == 24:
kv_strides = 2
elif px == 12:
kv_strides = 1
query_h_strides = 1
query_w_strides = 1
use_layer_scale = True
use_multi_query = True
use_residual = True
return [
num_heads, key_dim, value_dim, query_h_strides, query_w_strides, kv_strides,
use_layer_scale, use_multi_query, use_residual
]
MNV4HybirdConvMedium_Block_Specs = {
"conv0": {
"block_name": "convbn",
"num_blocks": 1,
"block_specs": [
[3, 32, 3, 2],
]
},
"layer1": {
"block_name": "fused_ib",
"num_blocks": 1,
"block_specs": [
[32, 48, 2, 4.0, True],
]
},
"layer2": {
"block_name": "uib",
"num_blocks": 2,
"block_specs": [
[48, 80, 3, 5, True, 2, 4],
[80, 80, 3, 3, True, 1, 2],
]
},
"layer3": {
"block_name": "uib",
"num_blocks": 8,
"block_specs": [
[80, 160, 3, 5, True, 2, 6],
[160, 160, 0, 0, True, 1, 2],
[160, 160, 3, 3, True, 1, 4],
[160, 160, 3, 5, True, 1, 4, mhsa(4, 64, 64, 24)],
[160, 160, 3, 3, True, 1, 4, mhsa(4, 64, 64, 24)],
[160, 160, 3, 0, True, 1, 4, mhsa(4, 64, 64, 24)],
[160, 160, 3, 3, True, 1, 4, mhsa(4, 64, 64, 24)],
[160, 160, 3, 0, True, 1, 4],
]
},
"layer4": {
"block_name": "uib",
"num_blocks": 12,
"block_specs": [
[160, 256, 5, 5, True, 2, 6],
[256, 256, 5, 5, True, 1, 4],
[256, 256, 3, 5, True, 1, 4],
[256, 256, 3, 5, True, 1, 4],
[256, 256, 0, 0, True, 1, 2],
[256, 256, 3, 5, True, 1, 2],
[256, 256, 0, 0, True, 1, 2],
[256, 256, 0, 0, True, 1, 4, mhsa(4, 64, 64, 12)],
[256, 256, 3, 0, True, 1, 4, mhsa(4, 64, 64, 12)],
[256, 256, 5, 5, True, 1, 4, mhsa(4, 64, 64, 12)],
[256, 256, 5, 0, True, 1, 4, mhsa(4, 64, 64, 12)],
[256, 256, 5, 0, True, 1, 4],
]
},
"layer5": {
"block_name": "convbn",
"num_blocks": 2,
"block_specs": [
[256, 960, 1, 1],
[960, 1280, 1, 1],
]
}
}
MNV4HybirdConvLarge_Block_Specs = {
"conv0": {
"block_name": "convbn",
"num_blocks": 1,
"block_specs": [
[3, 24, 3, 2], # in_channnels, out_channels, kernel_size, stride
]
},
"layer1": {
"block_name": "fused_ib",
"num_blocks": 1,
"block_specs": [
[24, 48, 2, 4.0, True],
]
},
"layer2": {
"block_name": "uib",
"num_blocks": 2,
"block_specs": [
[48, 96, 3, 5, True, 2, 4],
[96, 96, 3, 3, True, 1, 4],
]
},
"layer3": {
"block_name": "uib",
"num_blocks": 11,
"block_specs": [
# in_channels, out_channels, start_dw_kernel_size, middle_dw_kernel_size, middle_dw_downsample, stride, expand_ratio, msha
[96, 192, 3, 5, True, 2, 4],
[192, 192, 3, 3, True, 1, 4],
[192, 192, 3, 3, True, 1, 4],
[192, 192, 3, 3, True, 1, 4],
[192, 192, 3, 5, True, 1, 4],
[192, 192, 5, 3, True, 1, 4],
[192, 192, 5, 3, True, 1, 4, mhsa(8, 48, 48, 24)],
[192, 192, 5, 3, True, 1, 4, mhsa(8, 48, 48, 24)],
[192, 192, 5, 3, True, 1, 4, mhsa(8, 48, 48, 24)],
[192, 192, 5, 3, True, 1, 4, mhsa(8, 48, 48, 24)],
[192, 192, 3, 0, True, 1, 4],
]
},
"layer4": {
"block_name": "uib",
"num_blocks": 14,
"block_specs": [
[192, 512, 5, 5, True, 2, 4],
[512, 512, 5, 5, True, 1, 4],
[512, 512, 5, 5, True, 1, 4],
[512, 512, 5, 5, True, 1, 4],
[512, 512, 5, 0, True, 1, 4],
[512, 512, 5, 3, True, 1, 4],
[512, 512, 5, 0, True, 1, 4],
[512, 512, 5, 0, True, 1, 4],
[512, 512, 5, 3, True, 1, 4],
[512, 512, 5, 5, True, 1, 4, mhsa(8, 64, 64, 12)],
[512, 512, 5, 0, True, 1, 4, mhsa(8, 64, 64, 12)],
[512, 512, 5, 0, True, 1, 4, mhsa(8, 64, 64, 12)],
[512, 512, 5, 0, True, 1, 4, mhsa(8, 64, 64, 12)],
[512, 512, 5, 0, True, 1, 4],
]
},
"layer5": {
"block_name": "convbn",
"num_blocks": 2,
"block_specs": [
[512, 960, 1, 1],
[960, 1280, 1, 1],
]
}
}
MODEL_SPECS = {
"MNV4ConvSmall": MNV4ConvSmall_Block_Specs,
"MNV4ConvMedium": MNV4ConvMedium_Block_Specs,
"MNV4ConvLarge": MNV4ConvLarge_Block_Specs,
"MNV4HybridMedium": MNV4HybirdConvMedium_Block_Specs,
"MNV4HybridLarge": MNV4HybirdConvLarge_Block_Specs
}
#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Time : 2024/3/24 10:25
# @Author : 'IReverser'
# @FileName: predict.py
# Reference:
import os
import json
import glob
import torch
from PIL import Image
from torchvision import transforms
from MobileNetv4 import create_mobilenetv4
import time
MODEL_NAME = 'MNV4ConvSmall'
dataset_name = 'flowers'
MODEL_PATH = './checkpoints/model_MNV4ConvSmall_seed901_best.pt'
CLASS_NUM = 5
TEST_PATH = './results/'
assert os.path.exists(TEST_PATH), "file: '{}' does not exists.".format(TEST_PATH)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Use device: ', device)
# pre-processing
transform = transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
# read class_dictionary
json_path = './classes_indices.json'
assert os.path.exists(json_path), "file: '{}' does not exists.".format(json_path)
json_file = open(json_path, "r")
classes_indict = json.load(json_file)
print(classes_indict)
# create model
net = create_mobilenetv4(model_name=MODEL_NAME, num_classes=CLASS_NUM)
net = net.to(device)
# load model weights
assert os.path.exists(MODEL_PATH), "file: '{}' does not exist.".format(MODEL_PATH)
net.load_state_dict(torch.load(MODEL_PATH)["state_dict"])
for im_path in glob.glob(TEST_PATH + '*.jpg'):
# load data
im = Image.open(im_path)
im = transform(im) # [H, W, C] -> [C, H, W]
im = torch.unsqueeze(im, dim=0) # [C, H, W] -> [N, C, H, W]
im = im.to(device)
net.eval()
with torch.no_grad():
# start_time = time.time()
output = net(im)
# print("infer time:", time.time() - start_time, "s")
# confidence = torch.max(output, dim=1)[0].cpu().data.numpy()[0] # option
confidence = torch.max(torch.softmax(output, dim=1)).cpu().data.numpy()
predict = torch.max(output, dim=1)[1].cpu().data.numpy()
print('Vertification picture:', im_path.split('/')[-1], '\t',
'Recognition result:', classes_indict[str(int(predict))], '\t',
'Recognition confidence:', str(confidence))
# MobileNetV4
MobileNetV4 - Universal Models for the Mobile Ecosystem
paper: [http://arxiv.org/abs/2404.10518](http://arxiv.org/abs/2404.10518)
offical code: [https://github.com/tensorflow/models/blob/master/official/vision/modeling/backbones/mobilenet.py(tensorflow)](https://github.com/tensorflow/models/blob/master/official/vision/modeling/backbones/mobilenet.py(tensorflow))
# Usage
1. Install timm=0.3.2
```Shell
pip install timm==0.3.2
```
2. Train or eval
# Train model in MobileNetv4 series (MNV4ConvSmall, MNV4ConvMedium, MNV4ConvLarge, MNV4HybridMedium, MNV4HybridLarge)
```python
# train without cache
python train.py --gpu 0
python train.py --gpu 0 --arch 'MNV4ConvSmall' --batch_size 16 # 'MNV4ConvSmall, MNV4ConvMedium', 'MNV4ConvLarge', 'MNV4HybridMedium', 'MNV4HybridLarge'
# train with resume
python train.py --arch MNV4ConvSmall --resume checkpoints/model_MNV4ConvSmall_seed561_best.pt --gpu 0
```
# Validate
```python
# validate
python train.py --evaluate --gpu 1 --resume checkpoints/model_MNV4ConvSmall_seed561_best.pt
```
# Predict
Modify ```model_name```, ```dataset_name```, ```MODEL_PATH``` and ```CLASS_NUM``` in ```predict.py``` script. Put pictures into ```results``` directory.
```python
python predict.py
```
# torch==2.1.0
# torchvision==0.16.0
#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Time : 2024/3/24 10:26
# @Author : 'IReverser'
# @FileName: train.py
# Reference:
import argparse
import os
import numpy as np
import time
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data.distributed
import torch.nn.functional as F
from torch.optim.lr_scheduler import CosineAnnealingLR
from utils.misc import make_dir, AverageMeter, Logger, accuracy, format_time
from MobileNetv4 import create_mobilenetv4
from torchvision import datasets, transforms
try:
from apex import amp
except ImportError:
amp = None
def parse_option():
parser = argparse.ArgumentParser("Pytorch Training for transNeXt")
parser.add_argument('--data_path', default="./datasets/flowers", type=str, help='path to dataset')
parser.add_argument('--data', default="flowers", type=str, help='path to dataset')
parser.add_argument('--arch', metavar='ARCH', default='MNV4ConvSmall', type=str,
help='MNV4ConvSmall, MNV4ConvMedium, MNV4ConvLarge, MNV4HybridMedium, MNV4HybridLarg')
parser.add_argument('--input_size', default=224, type=int, help='number of total epochs to training')
parser.add_argument('--batch_size', type=int, default=128,
help='input batch size for training and eval(default: 128)')
parser.add_argument('--num_classes', default=5, type=int, help='num_classes')
parser.add_argument('--epochs', default=300, type=int, metavar='N', help='number of total epochs to training')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
help='manual epoch number (useful on restart)')
parser.add_argument('-b', '---batch-size', default=64, type=int, metavar='N',
help='mini-batch size (default: 256), this is the total'
'batch size of all GPUs on the current node when '
'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate',
dest='lr')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W',
help='weight decay (default: 1e-4)', dest='weight_decay')
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N',
help='number of data loading workers (default:4)')
parser.add_argument('--model-path', default=' ', type=str, help='loading pretraining model')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
help='path to latest checkpoint (default: None)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser.add_argument('--world-size', default=-1, type=int, help='number of nodes for distributed training')
parser.add_argument('--rank', default=-1, type=int, help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
help='url seed to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend')
parser.add_argument('--seed', default=0, type=int, help='seed for initializing training')
parser.add_argument('--gpu', default=0, type=int, help='gpu id to used')
parser.add_argument('--multiprocessing-distributed', action='store_true',
help='Use multi-processing distributed training to launch '
'N processes per node, which has N GPUs. This is the '
'fastest way to use PyTorch for either single node or '
'multi node data parallel training')
parser.add_argument('--suffix', default='', type=str, help='path suffix')
return parser.parse_args()
def data_transform(train=True):
if train:
transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
else:
transform = transforms.Compose([
transforms.Resize(256), # resize from smallest edge
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
return transform
def build_dataset(train, args):
root = os.path.join(args.data_path, 'train' if train else 'val')
dataset = datasets.ImageFolder(root, transform=data_transform(train=train))
num_classes = args.num_classes
return dataset, num_classes
def train(train_loader, model, criterion, optimizer, args, lr_scheduler):
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
# switch to train mode
model.train()
for i, (images, target) in enumerate(train_loader):
if args.gpu is not None:
images = images.cuda(args.gpu, non_blocking=True)
if torch.cuda.is_available():
#target = torch.tensor(F.one_hot(target, args.num_classes), dtype=torch.float32)
target = target.cuda(args.gpu, non_blocking=True)
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# compute gradient and do SGD step
optimizer.zero_grad()
loss.backward()
optimizer.step()
lr_scheduler.step()
return losses.avg, top1.avg, top5.avg
def validate(val_loader, model, criterion, args):
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
# switch to evaluate mode
model.eval()
with torch.no_grad():
for i, (images, target) in enumerate(val_loader):
if args.gpu is not None:
images = images.cuda(args.gpu, non_blocking=True)
if torch.cuda.is_available():
target = target.cuda(args.gpu, non_blocking=True)
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
return losses.avg, top1.avg, top5.avg
def main_worker(gpu, ngpus_per_node, log, args):
best_acc1 = 0
st_time = time.time()
args.gpu = gpu
if args.gpu is not None:
print("Use GPU: {} for training".format(args.gpu))
if args.distributed:
if args.dist_url == "env://" and args.rank == -1:
args.rank = int(os.environ["RANK"])
if args.multiprogressing_distributed:
# For multiprogressing distributed training, rank needs to be the global rank among all the progress
args.rank = args.rank * ngpus_per_node + gpu
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
# data loader
train_dataset, args.num_classes = build_dataset(train=True, args=args)
val_dataset, _ = build_dataset(train=False, args=args)
if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
else:
train_sampler = None
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size,
shuffle=(train_sampler is None),
num_workers=args.workers, pin_memory=True, sampler=train_sampler,
drop_last=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True, drop_last=False)
print(len(train_loader), len(val_loader))
# load model or teacher
model = create_mobilenetv4(model_name=args.arch, num_classes=args.num_classes)
if not torch.cuda.is_available():
print("Using CPU, this will be slow")
elif args.distributed:
# For multiprocessing distributed, DistributedDataParallel constructor
# should always set the single device scope, otherwise,
# DistributedDataParallel will use all available devices.
if args.gpu is not None:
torch.cuda.set_device(args.gpu)
model.cuda(args.gpu)
# When using a single GPU per process and per
# DistributedDataParallel, we need to divide the batch size
# ourselves based on the total number of GPUs we have
args.batch_size = int(args.batch_size / ngpus_per_node)
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
else:
model.cuda()
# DistributedDataParallel will divide and allocate batch_size to all available GPUs if device_ids are not set
model = torch.nn.parallel.DistributedDataParallel(model)
elif args.gpu is not None:
torch.cuda.set_device(args.gpu)
model = model.cuda(args.gpu)
else:
# DataParalled will divide and allocate batch_size to all available GPUs
model = torch.nn.DataParallel(model).cuda()
# load pretrain model
if args.model_path != " ":
print("loading pretrained model from " + args.model_path)
model.load_state_dict(torch.load(args.model_path))
model_n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("params: {:3f} M".format(model_n_parameters / 1024 / 1024))
# define optimizer
criterion = nn.CrossEntropyLoss().cuda(args.gpu)
optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, betas=(0.9, 0.999), weight_decay=args.weight_decay)
lr_scheduler = CosineAnnealingLR(optimizer=optimizer,
T_max=args.epochs * IMAGENET_TRAINSET_SIZE // args.batch_size // ngpus_per_node)
# optionally resume from a checkpoint
if args.resume:
if os.path.isfile(args.resume):
print("=> loading checkpoint '{}'".format(args.resume))
if args.gpu is None:
checkpoint = torch.load(args.resume)
else:
# map model to be loaded to specified single gpu
loc = "cuda:{}".format(args.gpu)
checkpoint = torch.load(args.resume, map_location=loc)
args.start_epoch = checkpoint["epoch"]
best_acc1 = checkpoint["best_acc1"]
if args.gpu is not None:
# best_acc1 may be from a checkpoint from a different GPU
best_acc1 = best_acc1.to(args.gpu)
model.load_state_dict(checkpoint["state_dict"])
optimizer.load_state_dict(checkpoint["optimizer"])
lr_scheduler.load_state_dict(checkpoint["scheduler"])
print("=> loaded chekpoint '{}' (epoch: {})".format(args.resume, checkpoint["epoch"]))
else:
print("=> no checkpoint found at '{}'".format(args.resume))
cudnn.benchmark = True
model.eval()
# eval only
if args.evaluate:
val_loss, val_acc1, val_acc5 = validate(val_loader, model, criterion, args)
row = {"Test_Loss": "%.5f" % val_loss, "Test_Acc@1": "%.3f" % val_acc1, "Test_Acc@5": "%.3f" % val_acc5}
log.writerow(row)
print(row)
return
# train and eval
print("=> Start training...")
for epoch in range(args.start_epoch, args.epochs):
if args.distributed:
train_sampler.set_epoch(epoch)
# adjust_learning_rate(optimizer, epoch, args)
# train for one epoch
train_loss, train_acc1, train_acc5 = train(train_loader, model, criterion, optimizer, args, lr_scheduler)
# eval
val_loss, val_acc1, val_acc5 = validate(val_loader, model, criterion, args)
row = {"Epoch": str(epoch),
"Train_Loss": "%.5f" % train_loss, "Train_Acc@1": "%.3f" % train_acc1,
"Train_Acc@5": "%.3f" % train_acc5,
"Test_Loss": "%.5f" % val_loss, "Test_Acc@1": "%.3f" % val_acc1, "Test_Acc@5": "%.3f" % val_acc5,
"Test_best_Acc@1": "%.3f" % best_acc1, "lr": "%.5f" % optimizer.param_groups[0]['lr']}
row.update({
'time': format_time(time.time() - st_time),
'eta': format_time((time.time() - st_time) / (epoch + 1) * (args.epochs - epoch - 1)),
})
print(row)
log.writerow(row)
if best_acc1 < val_acc1:
best_acc1 = val_acc1
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
and args.rank % ngpus_per_node == 0):
make_dir('./checkpoints/')
torch.save({'epoch': epoch + 1,
'arch': args.arch,
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
'scheduler': lr_scheduler.state_dict()},
'./checkpoints/model_{}_seed{}_best.pt'.format(args.arch, str(args.seed)))
else:
pass
print("=> Training Finish!")
def main():
args = parse_option()
if args.seed is not None:
if args.seed == 0:
args.seed = np.random.randint(1000)
torch.manual_seed(args.seed)
np.random.seed(args.seed)
torch.cuda.manual_seed(args.seed)
cudnn.deterministic = True
args.model_name = '{}_{}_{}_seed{}'.format(args.arch, args.data, args.suffix, str(args.seed))
make_dir('./logs/')
if args.evaluate:
log = Logger(args=args, filename='./logs/' + args.model_name + "_test.txt")
print("==>Result save to ", './logs/' + args.model_name + "_test.txt")
else:
log = Logger(args=args, filename='./logs/' + args.model_name + ".txt")
print("==>Result save to ", './logs/' + args.model_name + ".txt")
print(args)
if args.dist_url == 'env://' and args.world_size == -1:
args.world_size = int(os.environ["WORLD_SIZE"])
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if args.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, log, args)
if __name__ == '__main__':
IMAGENET_TRAINSET_SIZE = 3520
main()
python train.py --data_path "./datasets/flowers" --num_classes 5 --input_size 256 --gpu 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment