"vscode:/vscode.git/clone" did not exist on "ef5e7ce04d71ccf63a6267b0df107ebec20cb549"
Commit 1011377c authored by qianyj's avatar qianyj
Browse files

the source code of NNI for DCU

parent abc22158
AUTO_RESUME: False
DATA_DIR: './data/imagenet'
MODEL: '112m_retrain'
RESUME_PATH: './experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH: './experiments/workspace/retrain'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 8
NUM_GPU: 8
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'random' # Image resize interpolation type
BATCH_SIZE: 128 # batch size
NO_PREFECHTER: False
NET:
GP: 'avg'
DROPOUT_RATE: 0.2
SELECTION: 470
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9999
LR: 0.064
EPOCHS: 500
OPT_EPS: 1e-3
SCHED: 'cosine'
OPT: 'rmsproptf'
WARMUP_LR: 1e-6
DECAY_EPOCHS: 2.4
DECAY_RATE: 0.973
WARMUP_EPOCHS: 3
WEIGHT_DECAY: 1e-5
AUGMENTATION:
AA: 'rand-m9-mstd0.5'
RE_PROB: 0.2 # random erase prob
RE_MODE: 'pixel' # random erase mode
AUTO_RESUME: False
DATA_DIR: './data/imagenet'
MODEL: '14m_retrain'
RESUME_PATH: './experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH: './experiments/workspace/retrain'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 8
NUM_GPU: 8
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'random' # Image resize interpolation type
BATCH_SIZE: 128 # batch size
NO_PREFECHTER: False
NET:
GP: 'avg'
DROPOUT_RATE: 0.2
SELECTION: 470
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9999
LR: 0.064
EPOCHS: 500
OPT_EPS: 1e-3
SCHED: 'cosine'
OPT: 'rmsproptf'
WARMUP_LR: 1e-6
DECAY_EPOCHS: 2.4
DECAY_RATE: 0.973
WARMUP_EPOCHS: 3
WEIGHT_DECAY: 1e-5
AUGMENTATION:
AA: 'rand-m9-mstd0.5'
RE_PROB: 0.2 # random erase prob
RE_MODE: 'pixel' # random erase mode
AUTO_RESUME: False
DATA_DIR: './data/imagenet'
MODEL: '23m_retrain'
RESUME_PATH: './experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH: './experiments/workspace/retrain'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 8
NUM_GPU: 8
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'random' # Image resize interpolation type
BATCH_SIZE: 128 # batch size
NO_PREFECHTER: False
NET:
GP: 'avg'
DROPOUT_RATE: 0.2
SELECTION: 470
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9999
LR: 0.064
EPOCHS: 500
OPT_EPS: 1e-3
SCHED: 'cosine'
OPT: 'rmsproptf'
WARMUP_LR: 1e-6
DECAY_EPOCHS: 2.4
DECAY_RATE: 0.973
WARMUP_EPOCHS: 3
WEIGHT_DECAY: 1e-5
AUGMENTATION:
AA: 'rand-m9-mstd0.5'
RE_PROB: 0.2 # random erase prob
RE_MODE: 'pixel' # random erase mode
AUTO_RESUME: False
DATA_DIR: './data/imagenet'
MODEL: '287m_retrain'
RESUME_PATH: './experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH: './experiments/workspace/retrain'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 8
NUM_GPU: 8
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'random' # Image resize interpolation type
BATCH_SIZE: 128 # batch size
NO_PREFECHTER: False
NET:
GP: 'avg'
DROPOUT_RATE: 0.2
SELECTION: 470
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9999
LR: 0.064
EPOCHS: 500
OPT_EPS: 1e-3
SCHED: 'cosine'
OPT: 'rmsproptf'
WARMUP_LR: 1e-6
DECAY_EPOCHS: 2.4
DECAY_RATE: 0.973
WARMUP_EPOCHS: 3
WEIGHT_DECAY: 1e-5
AUGMENTATION:
AA: 'rand-m9-mstd0.5'
RE_PROB: 0.2 # random erase prob
RE_MODE: 'pixel' # random erase mode
AUTO_RESUME: False
DATA_DIR: './data/imagenet'
MODEL: '43m_retrain'
RESUME_PATH: './experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH: './experiments/workspace/retrain'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 8
NUM_GPU: 8
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'random' # Image resize interpolation type
BATCH_SIZE: 128 # batch size
NO_PREFECHTER: False
NET:
GP: 'avg'
DROPOUT_RATE: 0.2
SELECTION: 43
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9999
LR: 0.064
EPOCHS: 500
OPT_EPS: 1e-3
SCHED: 'cosine'
OPT: 'rmsproptf'
WARMUP_LR: 1e-6
DECAY_EPOCHS: 2.4
DECAY_RATE: 0.973
WARMUP_EPOCHS: 3
WEIGHT_DECAY: 1e-5
AUGMENTATION:
AA: 'rand-m9-mstd0.5'
RE_PROB: 0.2 # random erase prob
RE_MODE: 'pixel' # random erase mode
AUTO_RESUME: False
DATA_DIR: './data/imagenet'
MODEL: '481m_retrain'
RESUME_PATH: './experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH: './experiments/workspace/retrain'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 8
NUM_GPU: 8
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'random' # Image resize interpolation type
BATCH_SIZE: 128 # batch size
NO_PREFECHTER: False
NET:
GP: 'avg'
DROPOUT_RATE: 0.2
SELECTION: 481
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9999
LR: 0.064
EPOCHS: 500
OPT_EPS: 1e-3
SCHED: 'cosine'
OPT: 'rmsproptf'
WARMUP_LR: 1e-6
DECAY_EPOCHS: 2.4
DECAY_RATE: 0.973
WARMUP_EPOCHS: 3
WEIGHT_DECAY: 1e-5
AUGMENTATION:
AA: 'rand-m9-mstd0.5'
RE_PROB: 0.2 # random erase prob
RE_MODE: 'pixel' # random erase mode
AUTO_RESUME: False
DATA_DIR: './data/imagenet'
MODEL: '604m_retrain'
RESUME_PATH: './experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH: './experiments/workspace/retrain'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 8
NUM_GPU: 8
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'random' # Image resize interpolation type
BATCH_SIZE: 128 # batch size
NO_PREFECHTER: False
NET:
GP: 'avg'
DROPOUT_RATE: 0.2
SELECTION: 604
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9999
LR: 0.064
EPOCHS: 500
OPT_EPS: 1e-3
SCHED: 'cosine'
OPT: 'rmsproptf'
WARMUP_LR: 1e-6
DECAY_EPOCHS: 2.4
DECAY_RATE: 0.973
WARMUP_EPOCHS: 3
WEIGHT_DECAY: 1e-5
AUGMENTATION:
AA: 'rand-m9-mstd0.5'
RE_PROB: 0.2 # random erase prob
RE_MODE: 'pixel' # random erase mode
AUTO_RESUME: False
DATA_DIR: './data/imagenet'
MODEL: '72m_retrain'
RESUME_PATH: './experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH: './experiments/workspace/retrain'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 8
NUM_GPU: 8
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'random' # Image resize interpolation type
BATCH_SIZE: 128 # batch size
NO_PREFECHTER: False
NET:
GP: 'avg'
DROPOUT_RATE: 0.2
SELECTION: 470
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9999
LR: 0.064
EPOCHS: 500
OPT_EPS: 1e-3
SCHED: 'cosine'
OPT: 'rmsproptf'
WARMUP_LR: 1e-6
DECAY_EPOCHS: 2.4
DECAY_RATE: 0.973
WARMUP_EPOCHS: 3
WEIGHT_DECAY: 1e-5
AUGMENTATION:
AA: 'rand-m9-mstd0.5'
RE_PROB: 0.2 # random erase prob
RE_MODE: 'pixel' # random erase mode
AUTO_RESUME: True
DATA_DIR: './data/imagenet'
MODEL: 'Childnet_Testing'
RESUME_PATH: './experiments/workspace/ckps/42.pth.tar'
SAVE_PATH: './'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 4
NUM_GPU: 2
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'bilinear' # Image resize interpolation type
BATCH_SIZE: 32 # batch size
NO_PREFECHTER: False
NET:
GP: 'avg'
DROPOUT_RATE: 0.0
SELECTION: 42
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9998
OPTIMIZER:
MOMENTUM: 0.9
WEIGHT_DECAY: 1e-3
\ No newline at end of file
AUTO_RESUME: False
DATA_DIR: './data/imagenet'
MODEL: 'Supernet_Training'
RESUME_PATH: './experiments/workspace/train/resume.pth.tar'
SAVE_PATH: './'
SEED: 42
LOG_INTERVAL: 50
RECOVERY_INTERVAL: 0
WORKERS: 8
NUM_GPU: 8
SAVE_IMAGES: False
AMP: False
OUTPUT: 'None'
EVAL_METRICS: 'prec1'
TTA: 0
LOCAL_RANK: 0
DATASET:
NUM_CLASSES: 1000
IMAGE_SIZE: 224 # image patch size
INTERPOLATION: 'bilinear' # Image resize interpolation type
BATCH_SIZE: 128 # batch size
NET:
GP: 'avg'
DROPOUT_RATE: 0.0
EMA:
USE: True
FORCE_CPU: False # force model ema to be tracked on CPU
DECAY: 0.9998
OPT: 'sgd'
LR: 1.0
EPOCHS: 120
META_LR: 1e-4
BATCHNORM:
SYNC_BN: False
SUPERNET:
UPDATE_ITER: 200
SLICE: 4
POOL_SIZE: 10
RESUNIT: False
DIL_CONV: False
UPDATE_2ND: True
FLOPS_MINIMUM: 0
FLOPS_MAXIMUM: 600
PICK_METHOD: 'meta'
META_STA_EPOCH: 20
HOW_TO_PROB: 'pre_prob'
PRE_PROB: (0.05,0.2,0.05,0.5,0.05,0.15)
\ No newline at end of file
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from yacs.config import CfgNode as CN
DEFAULT_CROP_PCT = 0.875
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
__C = CN()
cfg = __C
__C.AUTO_RESUME = True
__C.DATA_DIR = './data/imagenet'
__C.MODEL = 'cream'
__C.RESUME_PATH = './experiments/ckps/resume.pth.tar'
__C.SAVE_PATH = './experiments/ckps/'
__C.SEED = 42
__C.LOG_INTERVAL = 50
__C.RECOVERY_INTERVAL = 0
__C.WORKERS = 4
__C.NUM_GPU = 1
__C.SAVE_IMAGES = False
__C.AMP = False
__C.ACC_GAP = 5
__C.OUTPUT = 'output/path/'
__C.EVAL_METRICS = 'prec1'
__C.TTA = 0 # Test or inference time augmentation
__C.LOCAL_RANK = 0
__C.VERBOSE = False
# dataset configs
__C.DATASET = CN()
__C.DATASET.NUM_CLASSES = 1000
__C.DATASET.IMAGE_SIZE = 224 # image patch size
__C.DATASET.INTERPOLATION = 'bilinear' # Image resize interpolation type
__C.DATASET.BATCH_SIZE = 32 # batch size
__C.DATASET.NO_PREFECHTER = False
__C.DATASET.PIN_MEM = True
__C.DATASET.VAL_BATCH_MUL = 4
# model configs
__C.NET = CN()
__C.NET.SELECTION = 14
__C.NET.GP = 'avg' # type of global pool ["avg", "max", "avgmax", "avgmaxc"]
__C.NET.DROPOUT_RATE = 0.0 # dropout rate
__C.NET.INPUT_ARCH = [[0], [3], [3, 3], [3, 1, 3], [3, 3, 3, 3], [3, 3, 3], [0]]
# model ema parameters
__C.NET.EMA = CN()
__C.NET.EMA.USE = True
__C.NET.EMA.FORCE_CPU = False # force model ema to be tracked on CPU
__C.NET.EMA.DECAY = 0.9998
# optimizer configs
__C.OPT = 'sgd'
__C.OPT_EPS = 1e-2
__C.MOMENTUM = 0.9
__C.WEIGHT_DECAY = 1e-4
__C.OPTIMIZER = CN()
__C.OPTIMIZER.NAME = 'sgd'
__C.OPTIMIZER.MOMENTUM = 0.9
__C.OPTIMIZER.WEIGHT_DECAY = 1e-3
# scheduler configs
__C.SCHED = 'sgd'
__C.LR_NOISE = None
__C.LR_NOISE_PCT = 0.67
__C.LR_NOISE_STD = 1.0
__C.WARMUP_LR = 1e-4
__C.MIN_LR = 1e-5
__C.EPOCHS = 200
__C.START_EPOCH = None
__C.DECAY_EPOCHS = 30.0
__C.WARMUP_EPOCHS = 3
__C.COOLDOWN_EPOCHS = 10
__C.PATIENCE_EPOCHS = 10
__C.DECAY_RATE = 0.1
__C.LR = 1e-2
__C.META_LR = 1e-4
# data augmentation parameters
__C.AUGMENTATION = CN()
__C.AUGMENTATION.AA = 'rand-m9-mstd0.5'
__C.AUGMENTATION.COLOR_JITTER = 0.4
__C.AUGMENTATION.RE_PROB = 0.2 # random erase prob
__C.AUGMENTATION.RE_MODE = 'pixel' # random erase mode
__C.AUGMENTATION.MIXUP = 0.0 # mixup alpha
__C.AUGMENTATION.MIXUP_OFF_EPOCH = 0 # turn off mixup after this epoch
__C.AUGMENTATION.SMOOTHING = 0.1 # label smoothing parameters
# batch norm parameters (only works with gen_efficientnet based models
# currently)
__C.BATCHNORM = CN()
__C.BATCHNORM.SYNC_BN = False
__C.BATCHNORM.BN_TF = False
__C.BATCHNORM.BN_MOMENTUM = 0.1 # batchnorm momentum override
__C.BATCHNORM.BN_EPS = 1e-5 # batchnorm eps override
# supernet training hyperparameters
__C.SUPERNET = CN()
__C.SUPERNET.UPDATE_ITER = 1300
__C.SUPERNET.SLICE = 4
__C.SUPERNET.POOL_SIZE = 10
__C.SUPERNET.RESUNIT = False
__C.SUPERNET.DIL_CONV = False
__C.SUPERNET.UPDATE_2ND = True
__C.SUPERNET.FLOPS_MAXIMUM = 600
__C.SUPERNET.FLOPS_MINIMUM = 0
__C.SUPERNET.PICK_METHOD = 'meta' # pick teacher method
__C.SUPERNET.META_STA_EPOCH = 20 # start using meta picking method
__C.SUPERNET.HOW_TO_PROB = 'pre_prob' # sample method
__C.SUPERNET.PRE_PROB = (0.05, 0.2, 0.05, 0.5, 0.05,
0.15) # sample prob in 'pre_prob'
import os
import time
import torch
import torchvision
from collections import OrderedDict
from lib.utils.util import AverageMeter, accuracy, reduce_tensor
def train_epoch(
epoch, model, loader, optimizer, loss_fn, cfg,
lr_scheduler=None, saver=None, output_dir='', use_amp=False,
model_ema=None, logger=None, writer=None, local_rank=0):
batch_time_m = AverageMeter()
data_time_m = AverageMeter()
losses_m = AverageMeter()
prec1_m = AverageMeter()
prec5_m = AverageMeter()
model.train()
end = time.time()
last_idx = len(loader) - 1
num_updates = epoch * len(loader)
optimizer.zero_grad()
for batch_idx, (input, target) in enumerate(loader):
last_batch = batch_idx == last_idx
data_time_m.update(time.time() - end)
input = input.cuda()
target = target.cuda()
output = model(input)
loss = loss_fn(output, target)
prec1, prec5 = accuracy(output, target, topk=(1, 5))
if cfg.NUM_GPU > 1:
reduced_loss = reduce_tensor(loss.data, cfg.NUM_GPU)
prec1 = reduce_tensor(prec1, cfg.NUM_GPU)
prec5 = reduce_tensor(prec5, cfg.NUM_GPU)
else:
reduced_loss = loss.data
optimizer.zero_grad()
loss.backward()
optimizer.step()
torch.cuda.synchronize()
losses_m.update(reduced_loss.item(), input.size(0))
prec1_m.update(prec1.item(), output.size(0))
prec5_m.update(prec5.item(), output.size(0))
if model_ema is not None:
model_ema.update(model)
num_updates += 1
batch_time_m.update(time.time() - end)
if last_batch or batch_idx % cfg.LOG_INTERVAL == 0:
lrl = [param_group['lr'] for param_group in optimizer.param_groups]
lr = sum(lrl) / len(lrl)
if local_rank == 0:
logger.info(
'Train: {} [{:>4d}/{}] '
'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) '
'Prec@1: {top1.val:>7.4f} ({top1.avg:>7.4f}) '
'Prec@5: {top5.val:>7.4f} ({top5.avg:>7.4f}) '
'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s '
'({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) '
'LR: {lr:.3e}'
'Data: {data_time.val:.3f} ({data_time.avg:.3f})'.format(
epoch,
batch_idx,
len(loader),
loss=losses_m,
top1=prec1_m,
top5=prec5_m,
batch_time=batch_time_m,
rate=input.size(0) *
cfg.NUM_GPU /
batch_time_m.val,
rate_avg=input.size(0) *
cfg.NUM_GPU /
batch_time_m.avg,
lr=lr,
data_time=data_time_m))
writer.add_scalar(
'Loss/train',
prec1_m.avg,
epoch *
len(loader) +
batch_idx)
writer.add_scalar(
'Accuracy/train',
prec1_m.avg,
epoch *
len(loader) +
batch_idx)
writer.add_scalar(
'Learning_Rate',
optimizer.param_groups[0]['lr'],
epoch * len(loader) + batch_idx)
if cfg.SAVE_IMAGES and output_dir:
torchvision.utils.save_image(
input, os.path.join(
output_dir, 'train-batch-%d.jpg' %
batch_idx), padding=0, normalize=True)
if saver is not None and cfg.RECOVERY_INTERVAL and (
last_batch or (batch_idx + 1) % cfg.RECOVERY_INTERVAL == 0):
saver.save_recovery(
model,
optimizer,
cfg,
epoch,
model_ema=model_ema,
use_amp=use_amp,
batch_idx=batch_idx)
if lr_scheduler is not None:
lr_scheduler.step_update(
num_updates=num_updates,
metric=losses_m.avg)
end = time.time()
# end for
if hasattr(optimizer, 'sync_lookahead'):
optimizer.sync_lookahead()
return OrderedDict([('loss', losses_m.avg)])
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
import time
import torch
from collections import OrderedDict
from lib.utils.util import AverageMeter, accuracy, reduce_tensor
def validate(epoch, model, loader, loss_fn, cfg, log_suffix='', logger=None, writer=None, local_rank=0):
batch_time_m = AverageMeter()
losses_m = AverageMeter()
prec1_m = AverageMeter()
prec5_m = AverageMeter()
model.eval()
end = time.time()
last_idx = len(loader) - 1
with torch.no_grad():
for batch_idx, (input, target) in enumerate(loader):
last_batch = batch_idx == last_idx
output = model(input)
if isinstance(output, (tuple, list)):
output = output[0]
# augmentation reduction
reduce_factor = cfg.TTA
if reduce_factor > 1:
output = output.unfold(
0,
reduce_factor,
reduce_factor).mean(
dim=2)
target = target[0:target.size(0):reduce_factor]
loss = loss_fn(output, target)
prec1, prec5 = accuracy(output, target, topk=(1, 5))
if cfg.NUM_GPU > 1:
reduced_loss = reduce_tensor(loss.data, cfg.NUM_GPU)
prec1 = reduce_tensor(prec1, cfg.NUM_GPU)
prec5 = reduce_tensor(prec5, cfg.NUM_GPU)
else:
reduced_loss = loss.data
torch.cuda.synchronize()
losses_m.update(reduced_loss.item(), input.size(0))
prec1_m.update(prec1.item(), output.size(0))
prec5_m.update(prec5.item(), output.size(0))
batch_time_m.update(time.time() - end)
end = time.time()
if local_rank == 0 and (last_batch or batch_idx % cfg.LOG_INTERVAL == 0):
log_name = 'Test' + log_suffix
logger.info(
'{0}: [{1:>4d}/{2}] '
'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) '
'Prec@1: {top1.val:>7.4f} ({top1.avg:>7.4f}) '
'Prec@5: {top5.val:>7.4f} ({top5.avg:>7.4f})'.format(
log_name, batch_idx, last_idx,
batch_time=batch_time_m, loss=losses_m,
top1=prec1_m, top5=prec5_m))
writer.add_scalar(
'Loss' + log_suffix + '/vaild',
prec1_m.avg,
epoch * len(loader) + batch_idx)
writer.add_scalar(
'Accuracy' +
log_suffix +
'/vaild',
prec1_m.avg,
epoch *
len(loader) +
batch_idx)
metrics = OrderedDict(
[('loss', losses_m.avg), ('prec1', prec1_m.avg), ('prec5', prec5_m.avg)])
return metrics
from lib.models.blocks.residual_block import get_Bottleneck, get_BasicBlock
from lib.models.blocks.inverted_residual_block import InvertedResidual
\ No newline at end of file
# This file is downloaded from
# https://github.com/rwightman/pytorch-image-models
import torch.nn as nn
from timm.models.layers import create_conv2d
from timm.models.efficientnet_blocks import make_divisible, resolve_se_args, \
SqueezeExcite, drop_path
class InvertedResidual(nn.Module):
""" Inverted residual block w/ optional SE and CondConv routing"""
def __init__(
self,
in_chs,
out_chs,
dw_kernel_size=3,
stride=1,
dilation=1,
pad_type='',
act_layer=nn.ReLU,
noskip=False,
exp_ratio=1.0,
exp_kernel_size=1,
pw_kernel_size=1,
se_ratio=0.,
se_kwargs=None,
norm_layer=nn.BatchNorm2d,
norm_kwargs=None,
conv_kwargs=None,
drop_path_rate=0.):
super(InvertedResidual, self).__init__()
norm_kwargs = norm_kwargs or {}
conv_kwargs = conv_kwargs or {}
mid_chs = make_divisible(in_chs * exp_ratio)
has_se = se_ratio is not None and se_ratio > 0.
self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
self.drop_path_rate = drop_path_rate
# Point-wise expansion
self.conv_pw = create_conv2d(
in_chs,
mid_chs,
exp_kernel_size,
padding=pad_type,
**conv_kwargs)
self.bn1 = norm_layer(mid_chs, **norm_kwargs)
self.act1 = act_layer(inplace=True)
# Depth-wise convolution
self.conv_dw = create_conv2d(
mid_chs, mid_chs, dw_kernel_size, stride=stride, dilation=dilation,
padding=pad_type, depthwise=True, **conv_kwargs)
self.bn2 = norm_layer(mid_chs, **norm_kwargs)
self.act2 = act_layer(inplace=True)
# Squeeze-and-excitation
if has_se:
se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer)
self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio, **se_kwargs)
else:
self.se = None
# Point-wise linear projection
self.conv_pwl = create_conv2d(
mid_chs,
out_chs,
pw_kernel_size,
padding=pad_type,
**conv_kwargs)
self.bn3 = norm_layer(out_chs, **norm_kwargs)
def feature_info(self, location):
if location == 'expansion': # after SE, input to PWL
info = dict(
module='conv_pwl',
hook_type='forward_pre',
num_chs=self.conv_pwl.in_channels)
else: # location == 'bottleneck', block output
info = dict(
module='',
hook_type='',
num_chs=self.conv_pwl.out_channels)
return info
def forward(self, x):
residual = x
# Point-wise expansion
x = self.conv_pw(x)
x = self.bn1(x)
x = self.act1(x)
# Depth-wise convolution
x = self.conv_dw(x)
x = self.bn2(x)
x = self.act2(x)
# Squeeze-and-excitation
if self.se is not None:
x = self.se(x)
# Point-wise linear projection
x = self.conv_pwl(x)
x = self.bn3(x)
if self.has_residual:
if self.drop_path_rate > 0.:
x = drop_path(x, self.drop_path_rate, self.training)
x += residual
return x
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
import torch
import torch.nn as nn
import torch.nn.functional as F
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=True)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
def __init__(self, inplanes, planes, stride=1, expansion=4):
super(Bottleneck, self).__init__()
planes = int(planes / expansion)
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=True)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(
planes,
planes * expansion,
kernel_size=1,
bias=True)
self.bn3 = nn.BatchNorm2d(planes * expansion)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
self.expansion = expansion
if inplanes != planes * self.expansion:
self.downsample = nn.Sequential(
nn.Conv2d(inplanes, planes * self.expansion,
kernel_size=1, stride=stride, bias=True),
nn.BatchNorm2d(planes * self.expansion),
)
else:
self.downsample = None
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
def get_Bottleneck(in_c, out_c, stride):
return Bottleneck(in_c, out_c, stride=stride)
def get_BasicBlock(in_c, out_c, stride):
return BasicBlock(in_c, out_c, stride=stride)
from lib.utils.util import *
from timm.models.efficientnet_blocks import *
class ChildNetBuilder:
def __init__(
self,
channel_multiplier=1.0,
channel_divisor=8,
channel_min=None,
output_stride=32,
pad_type='',
act_layer=None,
se_kwargs=None,
norm_layer=nn.BatchNorm2d,
norm_kwargs=None,
drop_path_rate=0.,
feature_location='',
verbose=False,
logger=None):
self.channel_multiplier = channel_multiplier
self.channel_divisor = channel_divisor
self.channel_min = channel_min
self.output_stride = output_stride
self.pad_type = pad_type
self.act_layer = act_layer
self.se_kwargs = se_kwargs
self.norm_layer = norm_layer
self.norm_kwargs = norm_kwargs
self.drop_path_rate = drop_path_rate
self.feature_location = feature_location
assert feature_location in ('pre_pwl', 'post_exp', '')
self.verbose = verbose
self.in_chs = None
self.features = OrderedDict()
self.logger = logger
def _round_channels(self, chs):
return round_channels(
chs,
self.channel_multiplier,
self.channel_divisor,
self.channel_min)
def _make_block(self, ba, block_idx, block_count):
drop_path_rate = self.drop_path_rate * block_idx / block_count
bt = ba.pop('block_type')
ba['in_chs'] = self.in_chs
ba['out_chs'] = self._round_channels(ba['out_chs'])
if 'fake_in_chs' in ba and ba['fake_in_chs']:
ba['fake_in_chs'] = self._round_channels(ba['fake_in_chs'])
ba['norm_layer'] = self.norm_layer
ba['norm_kwargs'] = self.norm_kwargs
ba['pad_type'] = self.pad_type
# block act fn overrides the model default
ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer
assert ba['act_layer'] is not None
if bt == 'ir':
ba['drop_path_rate'] = drop_path_rate
ba['se_kwargs'] = self.se_kwargs
if self.verbose:
self.logger.info(
' InvertedResidual {}, Args: {}'.format(
block_idx, str(ba)))
block = InvertedResidual(**ba)
elif bt == 'ds' or bt == 'dsa':
ba['drop_path_rate'] = drop_path_rate
ba['se_kwargs'] = self.se_kwargs
if self.verbose:
self.logger.info(
' DepthwiseSeparable {}, Args: {}'.format(
block_idx, str(ba)))
block = DepthwiseSeparableConv(**ba)
elif bt == 'cn':
if self.verbose:
self.logger.info(
' ConvBnAct {}, Args: {}'.format(
block_idx, str(ba)))
block = ConvBnAct(**ba)
else:
assert False, 'Uknkown block type (%s) while building model.' % bt
self.in_chs = ba['out_chs'] # update in_chs for arg of next block
return block
def __call__(self, in_chs, model_block_args):
""" Build the blocks
Args:
in_chs: Number of input-channels passed to first block
model_block_args: A list of lists, outer list defines stages, inner
list contains strings defining block configuration(s)
Return:
List of block stacks (each stack wrapped in nn.Sequential)
"""
if self.verbose:
self.logger.info(
'Building model trunk with %d stages...' %
len(model_block_args))
self.in_chs = in_chs
total_block_count = sum([len(x) for x in model_block_args])
total_block_idx = 0
current_stride = 2
current_dilation = 1
feature_idx = 0
stages = []
# outer list of block_args defines the stacks ('stages' by some
# conventions)
for stage_idx, stage_block_args in enumerate(model_block_args):
last_stack = stage_idx == (len(model_block_args) - 1)
if self.verbose:
self.logger.info('Stack: {}'.format(stage_idx))
assert isinstance(stage_block_args, list)
blocks = []
# each stack (stage) contains a list of block arguments
for block_idx, block_args in enumerate(stage_block_args):
last_block = block_idx == (len(stage_block_args) - 1)
extract_features = '' # No features extracted
if self.verbose:
self.logger.info(' Block: {}'.format(block_idx))
# Sort out stride, dilation, and feature extraction details
assert block_args['stride'] in (1, 2)
if block_idx >= 1:
# only the first block in any stack can have a stride > 1
block_args['stride'] = 1
do_extract = False
if self.feature_location == 'pre_pwl':
if last_block:
next_stage_idx = stage_idx + 1
if next_stage_idx >= len(model_block_args):
do_extract = True
else:
do_extract = model_block_args[next_stage_idx][0]['stride'] > 1
elif self.feature_location == 'post_exp':
if block_args['stride'] > 1 or (last_stack and last_block):
do_extract = True
if do_extract:
extract_features = self.feature_location
next_dilation = current_dilation
if block_args['stride'] > 1:
next_output_stride = current_stride * block_args['stride']
if next_output_stride > self.output_stride:
next_dilation = current_dilation * block_args['stride']
block_args['stride'] = 1
if self.verbose:
self.logger.info(
' Converting stride to dilation to maintain output_stride=={}'.format(
self.output_stride))
else:
current_stride = next_output_stride
block_args['dilation'] = current_dilation
if next_dilation != current_dilation:
current_dilation = next_dilation
# create the block
block = self._make_block(
block_args, total_block_idx, total_block_count)
blocks.append(block)
# stash feature module name and channel info for model feature
# extraction
if extract_features:
feature_module = block.feature_module(extract_features)
if feature_module:
feature_module = 'blocks.{}.{}.'.format(
stage_idx, block_idx) + feature_module
feature_channels = block.feature_channels(extract_features)
self.features[feature_idx] = dict(
name=feature_module,
num_chs=feature_channels
)
feature_idx += 1
# incr global block idx (across all stacks)
total_block_idx += 1
stages.append(nn.Sequential(*blocks))
return stages
from copy import deepcopy
from lib.utils.builder_util import modify_block_args
from lib.models.blocks import get_Bottleneck, InvertedResidual
from timm.models.efficientnet_blocks import *
from nni.nas.pytorch import mutables
class SuperNetBuilder:
""" Build Trunk Blocks
"""
def __init__(
self,
choices,
channel_multiplier=1.0,
channel_divisor=8,
channel_min=None,
output_stride=32,
pad_type='',
act_layer=None,
se_kwargs=None,
norm_layer=nn.BatchNorm2d,
norm_kwargs=None,
drop_path_rate=0.,
feature_location='',
verbose=False,
resunit=False,
dil_conv=False,
logger=None):
# dict
# choices = {'kernel_size': [3, 5, 7], 'exp_ratio': [4, 6]}
self.choices = [[x, y] for x in choices['kernel_size']
for y in choices['exp_ratio']]
self.choices_num = len(self.choices) - 1
self.channel_multiplier = channel_multiplier
self.channel_divisor = channel_divisor
self.channel_min = channel_min
self.output_stride = output_stride
self.pad_type = pad_type
self.act_layer = act_layer
self.se_kwargs = se_kwargs
self.norm_layer = norm_layer
self.norm_kwargs = norm_kwargs
self.drop_path_rate = drop_path_rate
self.feature_location = feature_location
assert feature_location in ('pre_pwl', 'post_exp', '')
self.verbose = verbose
self.resunit = resunit
self.dil_conv = dil_conv
self.logger = logger
# state updated during build, consumed by model
self.in_chs = None
def _round_channels(self, chs):
return round_channels(
chs,
self.channel_multiplier,
self.channel_divisor,
self.channel_min)
def _make_block(
self,
ba,
choice_idx,
block_idx,
block_count,
resunit=False,
dil_conv=False):
drop_path_rate = self.drop_path_rate * block_idx / block_count
bt = ba.pop('block_type')
ba['in_chs'] = self.in_chs
ba['out_chs'] = self._round_channels(ba['out_chs'])
if 'fake_in_chs' in ba and ba['fake_in_chs']:
# FIXME this is a hack to work around mismatch in origin impl input
# filters
ba['fake_in_chs'] = self._round_channels(ba['fake_in_chs'])
ba['norm_layer'] = self.norm_layer
ba['norm_kwargs'] = self.norm_kwargs
ba['pad_type'] = self.pad_type
# block act fn overrides the model default
ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer
assert ba['act_layer'] is not None
if bt == 'ir':
ba['drop_path_rate'] = drop_path_rate
ba['se_kwargs'] = self.se_kwargs
if self.verbose:
self.logger.info(
' InvertedResidual {}, Args: {}'.format(
block_idx, str(ba)))
block = InvertedResidual(**ba)
elif bt == 'ds' or bt == 'dsa':
ba['drop_path_rate'] = drop_path_rate
ba['se_kwargs'] = self.se_kwargs
if self.verbose:
self.logger.info(
' DepthwiseSeparable {}, Args: {}'.format(
block_idx, str(ba)))
block = DepthwiseSeparableConv(**ba)
elif bt == 'cn':
if self.verbose:
self.logger.info(
' ConvBnAct {}, Args: {}'.format(
block_idx, str(ba)))
block = ConvBnAct(**ba)
else:
assert False, 'Uknkown block type (%s) while building model.' % bt
if choice_idx == self.choice_num - 1:
self.in_chs = ba['out_chs'] # update in_chs for arg of next block
return block
def __call__(self, in_chs, model_block_args):
""" Build the blocks
Args:
in_chs: Number of input-channels passed to first block
model_block_args: A list of lists, outer list defines stages, inner
list contains strings defining block configuration(s)
Return:
List of block stacks (each stack wrapped in nn.Sequential)
"""
if self.verbose:
logging.info('Building model trunk with %d stages...' % len(model_block_args))
self.in_chs = in_chs
total_block_count = sum([len(x) for x in model_block_args])
total_block_idx = 0
current_stride = 2
current_dilation = 1
feature_idx = 0
stages = []
# outer list of block_args defines the stacks ('stages' by some conventions)
for stage_idx, stage_block_args in enumerate(model_block_args):
last_stack = stage_idx == (len(model_block_args) - 1)
if self.verbose:
self.logger.info('Stack: {}'.format(stage_idx))
assert isinstance(stage_block_args, list)
# blocks = []
# each stack (stage) contains a list of block arguments
for block_idx, block_args in enumerate(stage_block_args):
last_block = block_idx == (len(stage_block_args) - 1)
if self.verbose:
self.logger.info(' Block: {}'.format(block_idx))
# Sort out stride, dilation, and feature extraction details
assert block_args['stride'] in (1, 2)
if block_idx >= 1:
# only the first block in any stack can have a stride > 1
block_args['stride'] = 1
next_dilation = current_dilation
if block_args['stride'] > 1:
next_output_stride = current_stride * block_args['stride']
if next_output_stride > self.output_stride:
next_dilation = current_dilation * block_args['stride']
block_args['stride'] = 1
else:
current_stride = next_output_stride
block_args['dilation'] = current_dilation
if next_dilation != current_dilation:
current_dilation = next_dilation
if stage_idx==0 or stage_idx==6:
self.choice_num = 1
else:
self.choice_num = len(self.choices)
if self.dil_conv:
self.choice_num += 2
choice_blocks = []
block_args_copy = deepcopy(block_args)
if self.choice_num == 1:
# create the block
block = self._make_block(block_args, 0, total_block_idx, total_block_count)
choice_blocks.append(block)
else:
for choice_idx, choice in enumerate(self.choices):
# create the block
block_args = deepcopy(block_args_copy)
block_args = modify_block_args(block_args, choice[0], choice[1])
block = self._make_block(block_args, choice_idx, total_block_idx, total_block_count)
choice_blocks.append(block)
if self.dil_conv:
block_args = deepcopy(block_args_copy)
block_args = modify_block_args(block_args, 3, 0)
block = self._make_block(block_args, self.choice_num - 2, total_block_idx, total_block_count,
resunit=self.resunit, dil_conv=self.dil_conv)
choice_blocks.append(block)
block_args = deepcopy(block_args_copy)
block_args = modify_block_args(block_args, 5, 0)
block = self._make_block(block_args, self.choice_num - 1, total_block_idx, total_block_count,
resunit=self.resunit, dil_conv=self.dil_conv)
choice_blocks.append(block)
if self.resunit:
block = get_Bottleneck(block.conv_pw.in_channels,
block.conv_pwl.out_channels,
block.conv_dw.stride[0])
choice_blocks.append(block)
choice_block = mutables.LayerChoice(choice_blocks)
stages.append(choice_block)
# create the block
# block = self._make_block(block_args, total_block_idx, total_block_count)
total_block_idx += 1 # incr global block idx (across all stacks)
# stages.append(blocks)
return stages
from lib.utils.builder_util import *
from lib.models.builders.build_childnet import *
from timm.models.layers import SelectAdaptivePool2d
from timm.models.layers.activations import hard_sigmoid
class ChildNet(nn.Module):
def __init__(
self,
block_args,
num_classes=1000,
in_chans=3,
stem_size=16,
num_features=1280,
head_bias=True,
channel_multiplier=1.0,
pad_type='',
act_layer=nn.ReLU,
drop_rate=0.,
drop_path_rate=0.,
se_kwargs=None,
norm_layer=nn.BatchNorm2d,
norm_kwargs=None,
global_pool='avg',
logger=None,
verbose=False):
super(ChildNet, self).__init__()
self.num_classes = num_classes
self.num_features = num_features
self.drop_rate = drop_rate
self._in_chs = in_chans
self.logger = logger
# Stem
stem_size = round_channels(stem_size, channel_multiplier)
self.conv_stem = create_conv2d(
self._in_chs, stem_size, 3, stride=2, padding=pad_type)
self.bn1 = norm_layer(stem_size, **norm_kwargs)
self.act1 = act_layer(inplace=True)
self._in_chs = stem_size
# Middle stages (IR/ER/DS Blocks)
builder = ChildNetBuilder(
channel_multiplier, 8, None, 32, pad_type, act_layer, se_kwargs,
norm_layer, norm_kwargs, drop_path_rate, verbose=verbose)
self.blocks = nn.Sequential(*builder(self._in_chs, block_args))
# self.blocks = builder(self._in_chs, block_args)
self._in_chs = builder.in_chs
# Head + Pooling
self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
self.conv_head = create_conv2d(
self._in_chs,
self.num_features,
1,
padding=pad_type,
bias=head_bias)
self.act2 = act_layer(inplace=True)
# Classifier
self.classifier = nn.Linear(
self.num_features *
self.global_pool.feat_mult(),
self.num_classes)
efficientnet_init_weights(self)
def get_classifier(self):
return self.classifier
def reset_classifier(self, num_classes, global_pool='avg'):
self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
self.num_classes = num_classes
self.classifier = nn.Linear(
self.num_features * self.global_pool.feat_mult(),
num_classes) if self.num_classes else None
def forward_features(self, x):
# architecture = [[0], [], [], [], [], [0]]
x = self.conv_stem(x)
x = self.bn1(x)
x = self.act1(x)
x = self.blocks(x)
x = self.global_pool(x)
x = self.conv_head(x)
x = self.act2(x)
return x
def forward(self, x):
x = self.forward_features(x)
x = x.flatten(1)
if self.drop_rate > 0.:
x = F.dropout(x, p=self.drop_rate, training=self.training)
x = self.classifier(x)
return x
def gen_childnet(arch_list, arch_def, **kwargs):
# arch_list = [[0], [], [], [], [], [0]]
choices = {'kernel_size': [3, 5, 7], 'exp_ratio': [4, 6]}
choices_list = [[x, y] for x in choices['kernel_size']
for y in choices['exp_ratio']]
num_features = 1280
# act_layer = HardSwish
act_layer = Swish
new_arch = []
# change to child arch_def
for i, (layer_choice, layer_arch) in enumerate(zip(arch_list, arch_def)):
if len(layer_arch) == 1:
new_arch.append(layer_arch)
continue
else:
new_layer = []
for j, (block_choice, block_arch) in enumerate(
zip(layer_choice, layer_arch)):
kernel_size, exp_ratio = choices_list[block_choice]
elements = block_arch.split('_')
block_arch = block_arch.replace(
elements[2], 'k{}'.format(str(kernel_size)))
block_arch = block_arch.replace(
elements[4], 'e{}'.format(str(exp_ratio)))
new_layer.append(block_arch)
new_arch.append(new_layer)
model_kwargs = dict(
block_args=decode_arch_def(new_arch),
num_features=num_features,
stem_size=16,
norm_kwargs=resolve_bn_args(kwargs),
act_layer=act_layer,
se_kwargs=dict(
act_layer=nn.ReLU,
gate_fn=hard_sigmoid,
reduce_mid=True,
divisor=8),
**kwargs,
)
model = ChildNet(**model_kwargs)
return model
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment