"examples/trials/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "c6b1e513e0a9448d2f000d214643a9e18c360547"
Commit b40e3db7 authored by quzha's avatar quzha
Browse files

Merge branch 'master' of github.com:Microsoft/nni into dev-retiarii

parents efa4e31c 95f731e4
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
from lib.utils.builder_util import *
from lib.utils.search_structure_supernet import *
from lib.models.builders.build_supernet import *
from lib.utils.op_by_layer_dict import flops_op_dict
from timm.models.layers import SelectAdaptivePool2d
from timm.models.layers.activations import hard_sigmoid
class SuperNet(nn.Module):
def __init__(
self,
block_args,
choices,
num_classes=1000,
in_chans=3,
stem_size=16,
num_features=1280,
head_bias=True,
channel_multiplier=1.0,
pad_type='',
act_layer=nn.ReLU,
drop_rate=0.,
drop_path_rate=0.,
slice=4,
se_kwargs=None,
norm_layer=nn.BatchNorm2d,
logger=None,
norm_kwargs=None,
global_pool='avg',
resunit=False,
dil_conv=False,
verbose=False):
super(SuperNet, self).__init__()
self.num_classes = num_classes
self.num_features = num_features
self.drop_rate = drop_rate
self._in_chs = in_chans
self.logger = logger
# Stem
stem_size = round_channels(stem_size, channel_multiplier)
self.conv_stem = create_conv2d(
self._in_chs, stem_size, 3, stride=2, padding=pad_type)
self.bn1 = norm_layer(stem_size, **norm_kwargs)
self.act1 = act_layer(inplace=True)
self._in_chs = stem_size
# Middle stages (IR/ER/DS Blocks)
builder = SuperNetBuilder(
choices,
channel_multiplier,
8,
None,
32,
pad_type,
act_layer,
se_kwargs,
norm_layer,
norm_kwargs,
drop_path_rate,
verbose=verbose,
resunit=resunit,
dil_conv=dil_conv,
logger=self.logger)
blocks = builder(self._in_chs, block_args)
self.blocks = nn.Sequential(*blocks)
self._in_chs = builder.in_chs
# Head + Pooling
self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
self.conv_head = create_conv2d(
self._in_chs,
self.num_features,
1,
padding=pad_type,
bias=head_bias)
self.act2 = act_layer(inplace=True)
# Classifier
self.classifier = nn.Linear(
self.num_features *
self.global_pool.feat_mult(),
self.num_classes)
self.meta_layer = nn.Linear(self.num_classes * slice, 1)
efficientnet_init_weights(self)
def get_classifier(self):
return self.classifier
def reset_classifier(self, num_classes, global_pool='avg'):
self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
self.num_classes = num_classes
self.classifier = nn.Linear(
self.num_features * self.global_pool.feat_mult(),
num_classes) if self.num_classes else None
def forward_features(self, x):
x = self.conv_stem(x)
x = self.bn1(x)
x = self.act1(x)
x = self.blocks(x)
x = self.global_pool(x)
x = self.conv_head(x)
x = self.act2(x)
return x
def forward(self, x):
x = self.forward_features(x)
x = x.flatten(1)
if self.drop_rate > 0.:
x = F.dropout(x, p=self.drop_rate, training=self.training)
return self.classifier(x)
def forward_meta(self, features):
return self.meta_layer(features.view(1, -1))
def rand_parameters(self, architecture, meta=False):
for name, param in self.named_parameters(recurse=True):
if 'meta' in name and meta:
yield param
elif 'blocks' not in name and 'meta' not in name and (not meta):
yield param
if not meta:
for layer, layer_arch in zip(self.blocks, architecture):
for blocks, arch in zip(layer, layer_arch):
if arch == -1:
continue
for name, param in blocks[arch].named_parameters(
recurse=True):
yield param
class Classifier(nn.Module):
def __init__(self, num_classes=1000):
super(Classifier, self).__init__()
self.classifier = nn.Linear(num_classes, num_classes)
def forward(self, x):
return self.classifier(x)
def gen_supernet(flops_minimum=0, flops_maximum=600, **kwargs):
choices = {'kernel_size': [3, 5, 7], 'exp_ratio': [4, 6]}
num_features = 1280
# act_layer = HardSwish
act_layer = Swish
arch_def = [
# stage 0, 112x112 in
['ds_r1_k3_s1_e1_c16_se0.25'],
# stage 1, 112x112 in
['ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25',
'ir_r1_k3_s1_e4_c24_se0.25'],
# stage 2, 56x56 in
['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s1_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25',
'ir_r1_k5_s2_e4_c40_se0.25'],
# stage 3, 28x28 in
['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25',
'ir_r2_k3_s1_e4_c80_se0.25'],
# stage 4, 14x14in
['ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
'ir_r1_k3_s1_e6_c96_se0.25'],
# stage 5, 14x14in
['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25',
'ir_r1_k5_s2_e6_c192_se0.25'],
# stage 6, 7x7 in
['cn_r1_k1_s1_c320_se0.25'],
]
sta_num, arch_def, resolution = search_for_layer(
flops_op_dict, arch_def, flops_minimum, flops_maximum)
if sta_num is None or arch_def is None or resolution is None:
raise ValueError('Invalid FLOPs Settings')
model_kwargs = dict(
block_args=decode_arch_def(arch_def),
choices=choices,
num_features=num_features,
stem_size=16,
norm_kwargs=resolve_bn_args(kwargs),
act_layer=act_layer,
se_kwargs=dict(
act_layer=nn.ReLU,
gate_fn=hard_sigmoid,
reduce_mid=True,
divisor=8),
**kwargs,
)
model = SuperNet(**model_kwargs)
return model, sta_num, resolution
import math
import torch.nn as nn
from timm.utils import *
from timm.models.layers.activations import Swish
from timm.models.layers import CondConv2d, get_condconv_initializer
def parse_ksize(ss):
if ss.isdigit():
return int(ss)
else:
return [int(k) for k in ss.split('.')]
def decode_arch_def(
arch_def,
depth_multiplier=1.0,
depth_trunc='ceil',
experts_multiplier=1):
arch_args = []
for stack_idx, block_strings in enumerate(arch_def):
assert isinstance(block_strings, list)
stack_args = []
repeats = []
for block_str in block_strings:
assert isinstance(block_str, str)
ba, rep = decode_block_str(block_str)
if ba.get('num_experts', 0) > 0 and experts_multiplier > 1:
ba['num_experts'] *= experts_multiplier
stack_args.append(ba)
repeats.append(rep)
arch_args.append(
scale_stage_depth(
stack_args,
repeats,
depth_multiplier,
depth_trunc))
return arch_args
def modify_block_args(block_args, kernel_size, exp_ratio):
block_type = block_args['block_type']
if block_type == 'cn':
block_args['kernel_size'] = kernel_size
elif block_type == 'er':
block_args['exp_kernel_size'] = kernel_size
else:
block_args['dw_kernel_size'] = kernel_size
if block_type == 'ir' or block_type == 'er':
block_args['exp_ratio'] = exp_ratio
return block_args
def decode_block_str(block_str):
""" Decode block definition string
Gets a list of block arg (dicts) through a string notation of arguments.
E.g. ir_r2_k3_s2_e1_i32_o16_se0.25_noskip
All args can exist in any order with the exception of the leading string which
is assumed to indicate the block type.
leading string - block type (
ir = InvertedResidual, ds = DepthwiseSep, dsa = DeptwhiseSep with pw act, cn = ConvBnAct)
r - number of repeat blocks,
k - kernel size,
s - strides (1-9),
e - expansion ratio,
c - output channels,
se - squeeze/excitation ratio
n - activation fn ('re', 'r6', 'hs', or 'sw')
Args:
block_str: a string representation of block arguments.
Returns:
A list of block args (dicts)
Raises:
ValueError: if the string def not properly specified (TODO)
"""
assert isinstance(block_str, str)
ops = block_str.split('_')
block_type = ops[0] # take the block type off the front
ops = ops[1:]
options = {}
noskip = False
for op in ops:
# string options being checked on individual basis, combine if they
# grow
if op == 'noskip':
noskip = True
elif op.startswith('n'):
# activation fn
key = op[0]
v = op[1:]
if v == 're':
value = nn.ReLU
elif v == 'r6':
value = nn.ReLU6
elif v == 'sw':
value = Swish
else:
continue
options[key] = value
else:
# all numeric options
splits = re.split(r'(\d.*)', op)
if len(splits) >= 2:
key, value = splits[:2]
options[key] = value
# if act_layer is None, the model default (passed to model init) will be
# used
act_layer = options['n'] if 'n' in options else None
exp_kernel_size = parse_ksize(options['a']) if 'a' in options else 1
pw_kernel_size = parse_ksize(options['p']) if 'p' in options else 1
# FIXME hack to deal with in_chs issue in TPU def
fake_in_chs = int(options['fc']) if 'fc' in options else 0
num_repeat = int(options['r'])
# each type of block has different valid arguments, fill accordingly
if block_type == 'ir':
block_args = dict(
block_type=block_type,
dw_kernel_size=parse_ksize(options['k']),
exp_kernel_size=exp_kernel_size,
pw_kernel_size=pw_kernel_size,
out_chs=int(options['c']),
exp_ratio=float(options['e']),
se_ratio=float(options['se']) if 'se' in options else None,
stride=int(options['s']),
act_layer=act_layer,
noskip=noskip,
)
if 'cc' in options:
block_args['num_experts'] = int(options['cc'])
elif block_type == 'ds' or block_type == 'dsa':
block_args = dict(
block_type=block_type,
dw_kernel_size=parse_ksize(options['k']),
pw_kernel_size=pw_kernel_size,
out_chs=int(options['c']),
se_ratio=float(options['se']) if 'se' in options else None,
stride=int(options['s']),
act_layer=act_layer,
pw_act=block_type == 'dsa',
noskip=block_type == 'dsa' or noskip,
)
elif block_type == 'cn':
block_args = dict(
block_type=block_type,
kernel_size=int(options['k']),
out_chs=int(options['c']),
stride=int(options['s']),
act_layer=act_layer,
)
else:
assert False, 'Unknown block type (%s)' % block_type
return block_args, num_repeat
def scale_stage_depth(
stack_args,
repeats,
depth_multiplier=1.0,
depth_trunc='ceil'):
""" Per-stage depth scaling
Scales the block repeats in each stage. This depth scaling impl maintains
compatibility with the EfficientNet scaling method, while allowing sensible
scaling for other models that may have multiple block arg definitions in each stage.
"""
# We scale the total repeat count for each stage, there may be multiple
# block arg defs per stage so we need to sum.
num_repeat = sum(repeats)
if depth_trunc == 'round':
# Truncating to int by rounding allows stages with few repeats to remain
# proportionally smaller for longer. This is a good choice when stage definitions
# include single repeat stages that we'd prefer to keep that way as
# long as possible
num_repeat_scaled = max(1, round(num_repeat * depth_multiplier))
else:
# The default for EfficientNet truncates repeats to int via 'ceil'.
# Any multiplier > 1.0 will result in an increased depth for every
# stage.
num_repeat_scaled = int(math.ceil(num_repeat * depth_multiplier))
# Proportionally distribute repeat count scaling to each block definition in the stage.
# Allocation is done in reverse as it results in the first block being less likely to be scaled.
# The first block makes less sense to repeat in most of the arch
# definitions.
repeats_scaled = []
for r in repeats[::-1]:
rs = max(1, round((r / num_repeat * num_repeat_scaled)))
repeats_scaled.append(rs)
num_repeat -= r
num_repeat_scaled -= rs
repeats_scaled = repeats_scaled[::-1]
# Apply the calculated scaling to each block arg in the stage
sa_scaled = []
for ba, rep in zip(stack_args, repeats_scaled):
sa_scaled.extend([deepcopy(ba) for _ in range(rep)])
return sa_scaled
def init_weight_goog(m, n='', fix_group_fanout=True, last_bn=None):
""" Weight initialization as per Tensorflow official implementations.
Args:
m (nn.Module): module to init
n (str): module name
fix_group_fanout (bool): enable correct (matching Tensorflow TPU impl) fanout calculation w/ group convs
Handles layers in EfficientNet, EfficientNet-CondConv, MixNet, MnasNet, MobileNetV3, etc:
* https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_model.py
* https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py
"""
if isinstance(m, CondConv2d):
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
if fix_group_fanout:
fan_out //= m.groups
init_weight_fn = get_condconv_initializer(lambda w: w.data.normal_(
0, math.sqrt(2.0 / fan_out)), m.num_experts, m.weight_shape)
init_weight_fn(m.weight)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.Conv2d):
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
if fix_group_fanout:
fan_out //= m.groups
m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
if n in last_bn:
m.weight.data.zero_()
m.bias.data.zero_()
else:
m.weight.data.fill_(1.0)
m.bias.data.zero_()
m.weight.data.fill_(1.0)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
fan_out = m.weight.size(0) # fan-out
fan_in = 0
if 'routing_fn' in n:
fan_in = m.weight.size(1)
init_range = 1.0 / math.sqrt(fan_in + fan_out)
m.weight.data.uniform_(-init_range, init_range)
m.bias.data.zero_()
def efficientnet_init_weights(
model: nn.Module,
init_fn=None,
zero_gamma=False):
last_bn = []
if zero_gamma:
prev_n = ''
for n, m in model.named_modules():
if isinstance(m, nn.BatchNorm2d):
if ''.join(
prev_n.split('.')[
:-
1]) != ''.join(
n.split('.')[
:-
1]):
last_bn.append(prev_n)
prev_n = n
last_bn.append(prev_n)
init_fn = init_fn or init_weight_goog
for n, m in model.named_modules():
init_fn(m, n, last_bn=last_bn)
init_fn(m, n, last_bn=last_bn)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
import torch
from ptflops import get_model_complexity_info
class FlopsEst(object):
def __init__(self, model, input_shape=(2, 3, 224, 224), device='cpu'):
self.block_num = len(model.blocks)
self.choice_num = len(model.blocks[0])
self.flops_dict = {}
self.params_dict = {}
if device == 'cpu':
model = model.cpu()
else:
model = model.cuda()
self.params_fixed = 0
self.flops_fixed = 0
input = torch.randn(input_shape)
flops, params = get_model_complexity_info(
model.conv_stem, (3, 224, 224), as_strings=False, print_per_layer_stat=False)
self.params_fixed += params / 1e6
self.flops_fixed += flops / 1e6
input = model.conv_stem(input)
for block_id, block in enumerate(model.blocks):
self.flops_dict[block_id] = {}
self.params_dict[block_id] = {}
for module_id, module in enumerate(block):
flops, params = get_model_complexity_info(module, tuple(
input.shape[1:]), as_strings=False, print_per_layer_stat=False)
# Flops(M)
self.flops_dict[block_id][module_id] = flops / 1e6
# Params(M)
self.params_dict[block_id][module_id] = params / 1e6
input = module(input)
# conv_last
flops, params = get_model_complexity_info(model.global_pool, tuple(
input.shape[1:]), as_strings=False, print_per_layer_stat=False)
self.params_fixed += params / 1e6
self.flops_fixed += flops / 1e6
input = model.global_pool(input)
# globalpool
flops, params = get_model_complexity_info(model.conv_head, tuple(
input.shape[1:]), as_strings=False, print_per_layer_stat=False)
self.params_fixed += params / 1e6
self.flops_fixed += flops / 1e6
# return params (M)
def get_params(self, arch):
params = 0
for block_id, block in enumerate(arch):
if block == -1:
continue
params += self.params_dict[block_id][block]
return params + self.params_fixed
# return flops (M)
def get_flops(self, arch):
flops = 0
for block_id, block in enumerate(arch):
if block == 'LayerChoice1' or block_id == 'LayerChoice23':
continue
for idx, choice in enumerate(arch[block]):
flops += self.flops_dict[block_id][idx] * (1 if choice else 0)
return flops + self.flops_fixed
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
# This dictionary is generated from calculating each operation of each layer to quickly search for layers.
# flops_op_dict[which_stage][which_operation] =
# (flops_of_operation_with_stride1, flops_of_operation_with_stride2)
flops_op_dict = {}
for i in range(5):
flops_op_dict[i] = {}
flops_op_dict[0][0] = (21.828704, 18.820752)
flops_op_dict[0][1] = (32.669328, 28.16048)
flops_op_dict[0][2] = (25.039968, 23.637648)
flops_op_dict[0][3] = (37.486224, 35.385824)
flops_op_dict[0][4] = (29.856864, 30.862992)
flops_op_dict[0][5] = (44.711568, 46.22384)
flops_op_dict[1][0] = (11.808656, 11.86712)
flops_op_dict[1][1] = (17.68624, 17.780848)
flops_op_dict[1][2] = (13.01288, 13.87416)
flops_op_dict[1][3] = (19.492576, 20.791408)
flops_op_dict[1][4] = (14.819216, 16.88472)
flops_op_dict[1][5] = (22.20208, 25.307248)
flops_op_dict[2][0] = (8.198, 10.99632)
flops_op_dict[2][1] = (12.292848, 16.5172)
flops_op_dict[2][2] = (8.69976, 11.99984)
flops_op_dict[2][3] = (13.045488, 18.02248)
flops_op_dict[2][4] = (9.4524, 13.50512)
flops_op_dict[2][5] = (14.174448, 20.2804)
flops_op_dict[3][0] = (12.006112, 15.61632)
flops_op_dict[3][1] = (18.028752, 23.46096)
flops_op_dict[3][2] = (13.009632, 16.820544)
flops_op_dict[3][3] = (19.534032, 25.267296)
flops_op_dict[3][4] = (14.514912, 18.62688)
flops_op_dict[3][5] = (21.791952, 27.9768)
flops_op_dict[4][0] = (11.307456, 15.292416)
flops_op_dict[4][1] = (17.007072, 23.1504)
flops_op_dict[4][2] = (11.608512, 15.894528)
flops_op_dict[4][3] = (17.458656, 24.053568)
flops_op_dict[4][4] = (12.060096, 16.797696)
flops_op_dict[4][5] = (18.136032, 25.40832)
\ No newline at end of file
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
def search_for_layer(flops_op_dict, arch_def, flops_minimum, flops_maximum):
sta_num = [1, 1, 1, 1, 1]
order = [2, 3, 4, 1, 0, 2, 3, 4, 1, 0]
limits = [3, 3, 3, 2, 2, 4, 4, 4, 4, 4]
size_factor = 224 // 32
base_min_flops = sum([flops_op_dict[i][0][0] for i in range(5)])
base_max_flops = sum([flops_op_dict[i][5][0] for i in range(5)])
if base_min_flops > flops_maximum:
while base_min_flops > flops_maximum and size_factor >= 2:
size_factor = size_factor - 1
flops_minimum = flops_minimum * (7. / size_factor)
flops_maximum = flops_maximum * (7. / size_factor)
if size_factor < 2:
return None, None, None
elif base_max_flops < flops_minimum:
cur_ptr = 0
while base_max_flops < flops_minimum and cur_ptr <= 9:
if sta_num[order[cur_ptr]] >= limits[cur_ptr]:
cur_ptr += 1
continue
base_max_flops = base_max_flops + \
flops_op_dict[order[cur_ptr]][5][1]
sta_num[order[cur_ptr]] += 1
if cur_ptr > 7 and base_max_flops < flops_minimum:
return None, None, None
cur_ptr = 0
while cur_ptr <= 9:
if sta_num[order[cur_ptr]] >= limits[cur_ptr]:
cur_ptr += 1
continue
base_max_flops = base_max_flops + flops_op_dict[order[cur_ptr]][5][1]
if base_max_flops <= flops_maximum:
sta_num[order[cur_ptr]] += 1
else:
break
arch_def = [item[:i] for i, item in zip([1] + sta_num + [1], arch_def)]
# print(arch_def)
return sta_num, arch_def, size_factor * 32
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
import sys
import argparse
import torch.nn as nn
from torch import optim as optim
from thop import profile, clever_format
from timm.utils import *
from lib.config import cfg
def get_path_acc(model, path, val_loader, args, val_iters=50):
prec1_m = AverageMeter()
prec5_m = AverageMeter()
with torch.no_grad():
for batch_idx, (input, target) in enumerate(val_loader):
if batch_idx >= val_iters:
break
if not args.prefetcher:
input = input.cuda()
target = target.cuda()
output = model(input, path)
if isinstance(output, (tuple, list)):
output = output[0]
# augmentation reduction
reduce_factor = args.tta
if reduce_factor > 1:
output = output.unfold(
0,
reduce_factor,
reduce_factor).mean(
dim=2)
target = target[0:target.size(0):reduce_factor]
prec1, prec5 = accuracy(output, target, topk=(1, 5))
torch.cuda.synchronize()
prec1_m.update(prec1.item(), output.size(0))
prec5_m.update(prec5.item(), output.size(0))
return (prec1_m.avg, prec5_m.avg)
def get_logger(file_path):
""" Make python logger """
log_format = '%(asctime)s | %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
format=log_format, datefmt='%m/%d %I:%M:%S %p')
logger = logging.getLogger('')
formatter = logging.Formatter(log_format, datefmt='%m/%d %I:%M:%S %p')
file_handler = logging.FileHandler(file_path)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
return logger
def add_weight_decay_supernet(model, args, weight_decay=1e-5, skip_list=()):
decay = []
no_decay = []
meta_layer_no_decay = []
meta_layer_decay = []
for name, param in model.named_parameters():
if not param.requires_grad:
continue # frozen weights
if len(param.shape) == 1 or name.endswith(
".bias") or name in skip_list:
if 'meta_layer' in name:
meta_layer_no_decay.append(param)
else:
no_decay.append(param)
else:
if 'meta_layer' in name:
meta_layer_decay.append(param)
else:
decay.append(param)
return [
{'params': no_decay, 'weight_decay': 0., 'lr': args.lr},
{'params': decay, 'weight_decay': weight_decay, 'lr': args.lr},
{'params': meta_layer_no_decay, 'weight_decay': 0., 'lr': args.meta_lr},
{'params': meta_layer_decay, 'weight_decay': 0, 'lr': args.meta_lr},
]
def create_optimizer_supernet(args, model, has_apex, filter_bias_and_bn=True):
opt_lower = args.opt.lower()
weight_decay = args.weight_decay
if 'adamw' in opt_lower or 'radam' in opt_lower:
weight_decay /= args.lr
if weight_decay and filter_bias_and_bn:
parameters = add_weight_decay_supernet(model, args, weight_decay)
weight_decay = 0.
else:
parameters = model.parameters()
if 'fused' in opt_lower:
assert has_apex and torch.cuda.is_available(
), 'APEX and CUDA required for fused optimizers'
opt_split = opt_lower.split('_')
opt_lower = opt_split[-1]
if opt_lower == 'sgd' or opt_lower == 'nesterov':
optimizer = optim.SGD(
parameters,
momentum=args.momentum,
weight_decay=weight_decay,
nesterov=True)
elif opt_lower == 'momentum':
optimizer = optim.SGD(
parameters,
momentum=args.momentum,
weight_decay=weight_decay,
nesterov=False)
elif opt_lower == 'adam':
optimizer = optim.Adam(
parameters, weight_decay=weight_decay, eps=args.opt_eps)
else:
assert False and "Invalid optimizer"
raise ValueError
return optimizer
def convert_lowercase(cfg):
keys = cfg.keys()
lowercase_keys = [key.lower() for key in keys]
values = [cfg.get(key) for key in keys]
for lowercase_key, value in zip(lowercase_keys, values):
cfg.setdefault(lowercase_key, value)
return cfg
def parse_config_args(exp_name):
parser = argparse.ArgumentParser(description=exp_name)
parser.add_argument(
'--cfg',
type=str,
default='../experiments/workspace/retrain/retrain.yaml',
help='configuration of cream')
parser.add_argument('--local_rank', type=int, default=0,
help='local_rank')
args = parser.parse_args()
cfg.merge_from_file(args.cfg)
converted_cfg = convert_lowercase(cfg)
return args, converted_cfg
def get_model_flops_params(model, input_size=(1, 3, 224, 224)):
input = torch.randn(input_size)
macs, params = profile(deepcopy(model), inputs=(input,), verbose=False)
macs, params = clever_format([macs, params], "%.3f")
return macs, params
def cross_entropy_loss_with_soft_target(pred, soft_target):
logsoftmax = nn.LogSoftmax()
return torch.mean(torch.sum(- soft_target * logsoftmax(pred), 1))
def create_supernet_scheduler(cfg, optimizer):
ITERS = cfg.EPOCHS * \
(1280000 / (cfg.NUM_GPU * cfg.DATASET.BATCH_SIZE))
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda step: (
cfg.LR - step / ITERS) if step <= ITERS else 0, last_epoch=-1)
return lr_scheduler, cfg.EPOCHS
yacs
numpy==1.17
opencv-python==4.0.1.24
torchvision==0.2.1
thop
git+https://github.com/sovrasov/flops-counter.pytorch.git
pillow==6.1.0
torch==1.2
timm==0.1.20
tensorboardx==1.2
tensorboard
future
\ No newline at end of file
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
import os
import warnings
import datetime
import torch
import numpy as np
import torch.nn as nn
from torchscope import scope
from torch.utils.tensorboard import SummaryWriter
# import timm packages
from timm.optim import create_optimizer
from timm.models import resume_checkpoint
from timm.scheduler import create_scheduler
from timm.data import Dataset, create_loader
from timm.utils import ModelEma, update_summary
from timm.loss import LabelSmoothingCrossEntropy
# import apex as distributed package
try:
from apex import amp
from apex.parallel import DistributedDataParallel as DDP
from apex.parallel import convert_syncbn_model
HAS_APEX = True
except ImportError:
from torch.nn.parallel import DistributedDataParallel as DDP
HAS_APEX = False
# import models and training functions
from lib.core.test import validate
from lib.core.retrain import train_epoch
from lib.models.structures.childnet import gen_childnet
from lib.utils.util import parse_config_args, get_logger, get_model_flops_params
from lib.config import DEFAULT_CROP_PCT, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
def main():
args, cfg = parse_config_args('nni.cream.childnet')
# resolve logging
output_dir = os.path.join(cfg.SAVE_PATH,
"{}-{}".format(datetime.date.today().strftime('%m%d'),
cfg.MODEL))
if not os.path.exists(output_dir):
os.mkdir(output_dir)
if args.local_rank == 0:
logger = get_logger(os.path.join(output_dir, 'retrain.log'))
writer = SummaryWriter(os.path.join(output_dir, 'runs'))
else:
writer, logger = None, None
# retrain model selection
if cfg.NET.SELECTION == 481:
arch_list = [
[0], [
3, 4, 3, 1], [
3, 2, 3, 0], [
3, 3, 3, 1], [
3, 3, 3, 3], [
3, 3, 3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 224
elif cfg.NET.SELECTION == 43:
arch_list = [[0], [3], [3, 1], [3, 1], [3, 3, 3], [3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 96
elif cfg.NET.SELECTION == 14:
arch_list = [[0], [3], [3, 3], [3, 3], [3], [3], [0]]
cfg.DATASET.IMAGE_SIZE = 64
elif cfg.NET.SELECTION == 112:
arch_list = [[0], [3], [3, 3], [3, 3], [3, 3, 3], [3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 160
elif cfg.NET.SELECTION == 287:
arch_list = [[0], [3], [3, 3], [3, 1, 3], [3, 3, 3, 3], [3, 3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 224
elif cfg.NET.SELECTION == 604:
arch_list = [
[0], [
3, 3, 2, 3, 3], [
3, 2, 3, 2, 3], [
3, 2, 3, 2, 3], [
3, 3, 2, 2, 3, 3], [
3, 3, 2, 3, 3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 224
elif cfg.NET.SELECTION == -1:
arch_list = cfg.NET.INPUT_ARCH
cfg.DATASET.IMAGE_SIZE = 224
else:
raise ValueError("Model Retrain Selection is not Supported!")
# define childnet architecture from arch_list
stem = ['ds_r1_k3_s1_e1_c16_se0.25', 'cn_r1_k1_s1_c320_se0.25']
choice_block_pool = ['ir_r1_k3_s2_e4_c24_se0.25',
'ir_r1_k5_s2_e4_c40_se0.25',
'ir_r1_k3_s2_e6_c80_se0.25',
'ir_r1_k3_s1_e6_c96_se0.25',
'ir_r1_k3_s2_e6_c192_se0.25']
arch_def = [[stem[0]]] + [[choice_block_pool[idx]
for repeat_times in range(len(arch_list[idx + 1]))]
for idx in range(len(choice_block_pool))] + [[stem[1]]]
# generate childnet
model = gen_childnet(
arch_list,
arch_def,
num_classes=cfg.DATASET.NUM_CLASSES,
drop_rate=cfg.NET.DROPOUT_RATE,
global_pool=cfg.NET.GP)
# initialize training parameters
eval_metric = cfg.EVAL_METRICS
best_metric, best_epoch, saver = None, None, None
# initialize distributed parameters
distributed = cfg.NUM_GPU > 1
torch.cuda.set_device(args.local_rank)
torch.distributed.init_process_group(backend='nccl', init_method='env://')
if args.local_rank == 0:
logger.info(
'Training on Process {} with {} GPUs.'.format(
args.local_rank, cfg.NUM_GPU))
# fix random seeds
torch.manual_seed(cfg.SEED)
torch.cuda.manual_seed_all(cfg.SEED)
np.random.seed(cfg.SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# get parameters and FLOPs of model
if args.local_rank == 0:
macs, params = get_model_flops_params(model, input_size=(
1, 3, cfg.DATASET.IMAGE_SIZE, cfg.DATASET.IMAGE_SIZE))
logger.info(
'[Model-{}] Flops: {} Params: {}'.format(cfg.NET.SELECTION, macs, params))
# create optimizer
optimizer = create_optimizer(cfg, model)
model = model.cuda()
# optionally resume from a checkpoint
resume_state, resume_epoch = {}, None
if cfg.AUTO_RESUME:
resume_state, resume_epoch = resume_checkpoint(model, cfg.RESUME_PATH)
optimizer.load_state_dict(resume_state['optimizer'])
del resume_state
model_ema = None
if cfg.NET.EMA.USE:
model_ema = ModelEma(
model,
decay=cfg.NET.EMA.DECAY,
device='cpu' if cfg.NET.EMA.FORCE_CPU else '',
resume=cfg.RESUME_PATH if cfg.AUTO_RESUME else None)
if distributed:
if cfg.BATCHNORM.SYNC_BN:
try:
if HAS_APEX:
model = convert_syncbn_model(model)
else:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
model)
if args.local_rank == 0:
logger.info(
'Converted model to use Synchronized BatchNorm.')
except Exception as e:
if args.local_rank == 0:
logger.error(
'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1 with exception {}'.format(e))
if HAS_APEX:
model = DDP(model, delay_allreduce=True)
else:
if args.local_rank == 0:
logger.info(
"Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP.")
# can use device str in Torch >= 1.1
model = DDP(model, device_ids=[args.local_rank])
# imagenet train dataset
train_dir = os.path.join(cfg.DATA_DIR, 'train')
if not os.path.exists(train_dir) and args.local_rank == 0:
logger.error('Training folder does not exist at: {}'.format(train_dir))
exit(1)
dataset_train = Dataset(train_dir)
loader_train = create_loader(
dataset_train,
input_size=(3, cfg.DATASET.IMAGE_SIZE, cfg.DATASET.IMAGE_SIZE),
batch_size=cfg.DATASET.BATCH_SIZE,
is_training=True,
color_jitter=cfg.AUGMENTATION.COLOR_JITTER,
auto_augment=cfg.AUGMENTATION.AA,
num_aug_splits=0,
crop_pct=DEFAULT_CROP_PCT,
mean=IMAGENET_DEFAULT_MEAN,
std=IMAGENET_DEFAULT_STD,
num_workers=cfg.WORKERS,
distributed=distributed,
collate_fn=None,
pin_memory=cfg.DATASET.PIN_MEM,
interpolation='random',
re_mode=cfg.AUGMENTATION.RE_MODE,
re_prob=cfg.AUGMENTATION.RE_PROB
)
# imagenet validation dataset
eval_dir = os.path.join(cfg.DATA_DIR, 'val')
if not os.path.exists(eval_dir) and args.local_rank == 0:
logger.error(
'Validation folder does not exist at: {}'.format(eval_dir))
exit(1)
dataset_eval = Dataset(eval_dir)
loader_eval = create_loader(
dataset_eval,
input_size=(3, cfg.DATASET.IMAGE_SIZE, cfg.DATASET.IMAGE_SIZE),
batch_size=cfg.DATASET.VAL_BATCH_MUL * cfg.DATASET.BATCH_SIZE,
is_training=False,
interpolation=cfg.DATASET.INTERPOLATION,
crop_pct=DEFAULT_CROP_PCT,
mean=IMAGENET_DEFAULT_MEAN,
std=IMAGENET_DEFAULT_STD,
num_workers=cfg.WORKERS,
distributed=distributed,
pin_memory=cfg.DATASET.PIN_MEM
)
# whether to use label smoothing
if cfg.AUGMENTATION.SMOOTHING > 0.:
train_loss_fn = LabelSmoothingCrossEntropy(
smoothing=cfg.AUGMENTATION.SMOOTHING).cuda()
validate_loss_fn = nn.CrossEntropyLoss().cuda()
else:
train_loss_fn = nn.CrossEntropyLoss().cuda()
validate_loss_fn = train_loss_fn
# create learning rate scheduler
lr_scheduler, num_epochs = create_scheduler(cfg, optimizer)
start_epoch = resume_epoch if resume_epoch is not None else 0
if start_epoch > 0:
lr_scheduler.step(start_epoch)
if args.local_rank == 0:
logger.info('Scheduled epochs: {}'.format(num_epochs))
try:
best_record, best_ep = 0, 0
for epoch in range(start_epoch, num_epochs):
if distributed:
loader_train.sampler.set_epoch(epoch)
train_metrics = train_epoch(
epoch,
model,
loader_train,
optimizer,
train_loss_fn,
cfg,
lr_scheduler=lr_scheduler,
saver=saver,
output_dir=output_dir,
model_ema=model_ema,
logger=logger,
writer=writer,
local_rank=args.local_rank)
eval_metrics = validate(
epoch,
model,
loader_eval,
validate_loss_fn,
cfg,
logger=logger,
writer=writer,
local_rank=args.local_rank)
if model_ema is not None and not cfg.NET.EMA.FORCE_CPU:
ema_eval_metrics = validate(
epoch,
model_ema.ema,
loader_eval,
validate_loss_fn,
cfg,
log_suffix='_EMA',
logger=logger,
writer=writer)
eval_metrics = ema_eval_metrics
if lr_scheduler is not None:
lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])
update_summary(epoch, train_metrics, eval_metrics, os.path.join(
output_dir, 'summary.csv'), write_header=best_metric is None)
if saver is not None:
# save proper checkpoint with eval metric
save_metric = eval_metrics[eval_metric]
best_metric, best_epoch = saver.save_checkpoint(
model, optimizer, cfg,
epoch=epoch, model_ema=model_ema, metric=save_metric)
if best_record < eval_metrics[eval_metric]:
best_record = eval_metrics[eval_metric]
best_ep = epoch
if args.local_rank == 0:
logger.info(
'*** Best metric: {0} (epoch {1})'.format(best_record, best_ep))
except KeyboardInterrupt:
pass
if best_metric is not None:
logger.info(
'*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
if __name__ == '__main__':
main()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
import os
import warnings
import datetime
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
# import timm packages
from timm.utils import ModelEma
from timm.models import resume_checkpoint
from timm.data import Dataset, create_loader
# import apex as distributed package
try:
from apex.parallel import convert_syncbn_model
from apex.parallel import DistributedDataParallel as DDP
HAS_APEX = True
except ImportError:
from torch.nn.parallel import DistributedDataParallel as DDP
HAS_APEX = False
# import models and training functions
from lib.core.test import validate
from lib.models.structures.childnet import gen_childnet
from lib.utils.util import parse_config_args, get_logger, get_model_flops_params
from lib.config import DEFAULT_CROP_PCT, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
def main():
args, cfg = parse_config_args('child net testing')
# resolve logging
output_dir = os.path.join(cfg.SAVE_PATH,
"{}-{}".format(datetime.date.today().strftime('%m%d'),
cfg.MODEL))
if not os.path.exists(output_dir):
os.mkdir(output_dir)
if args.local_rank == 0:
logger = get_logger(os.path.join(output_dir, 'test.log'))
writer = SummaryWriter(os.path.join(output_dir, 'runs'))
else:
writer, logger = None, None
# retrain model selection
if cfg.NET.SELECTION == 481:
arch_list = [
[0], [
3, 4, 3, 1], [
3, 2, 3, 0], [
3, 3, 3, 1], [
3, 3, 3, 3], [
3, 3, 3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 224
elif cfg.NET.SELECTION == 43:
arch_list = [[0], [3], [3, 1], [3, 1], [3, 3, 3], [3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 96
elif cfg.NET.SELECTION == 14:
arch_list = [[0], [3], [3, 3], [3, 3], [3], [3], [0]]
cfg.DATASET.IMAGE_SIZE = 64
elif cfg.NET.SELECTION == 112:
arch_list = [[0], [3], [3, 3], [3, 3], [3, 3, 3], [3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 160
elif cfg.NET.SELECTION == 287:
arch_list = [[0], [3], [3, 3], [3, 1, 3], [3, 3, 3, 3], [3, 3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 224
elif cfg.NET.SELECTION == 604:
arch_list = [[0], [3, 3, 2, 3, 3], [3, 2, 3, 2, 3], [3, 2, 3, 2, 3],
[3, 3, 2, 2, 3, 3], [3, 3, 2, 3, 3, 3], [0]]
cfg.DATASET.IMAGE_SIZE = 224
else:
raise ValueError("Model Test Selection is not Supported!")
# define childnet architecture from arch_list
stem = ['ds_r1_k3_s1_e1_c16_se0.25', 'cn_r1_k1_s1_c320_se0.25']
choice_block_pool = ['ir_r1_k3_s2_e4_c24_se0.25',
'ir_r1_k5_s2_e4_c40_se0.25',
'ir_r1_k3_s2_e6_c80_se0.25',
'ir_r1_k3_s1_e6_c96_se0.25',
'ir_r1_k3_s2_e6_c192_se0.25']
arch_def = [[stem[0]]] + [[choice_block_pool[idx]
for repeat_times in range(len(arch_list[idx + 1]))]
for idx in range(len(choice_block_pool))] + [[stem[1]]]
# generate childnet
model = gen_childnet(
arch_list,
arch_def,
num_classes=cfg.DATASET.NUM_CLASSES,
drop_rate=cfg.NET.DROPOUT_RATE,
global_pool=cfg.NET.GP)
if args.local_rank == 0:
macs, params = get_model_flops_params(model, input_size=(
1, 3, cfg.DATASET.IMAGE_SIZE, cfg.DATASET.IMAGE_SIZE))
logger.info(
'[Model-{}] Flops: {} Params: {}'.format(cfg.NET.SELECTION, macs, params))
# initialize distributed parameters
torch.cuda.set_device(args.local_rank)
torch.distributed.init_process_group(backend='nccl', init_method='env://')
if args.local_rank == 0:
logger.info(
"Training on Process {} with {} GPUs.".format(
args.local_rank, cfg.NUM_GPU))
# resume model from checkpoint
assert cfg.AUTO_RESUME is True and os.path.exists(cfg.RESUME_PATH)
_, __ = resume_checkpoint(model, cfg.RESUME_PATH)
model = model.cuda()
model_ema = None
if cfg.NET.EMA.USE:
# Important to create EMA model after cuda(), DP wrapper, and AMP but
# before SyncBN and DDP wrapper
model_ema = ModelEma(
model,
decay=cfg.NET.EMA.DECAY,
device='cpu' if cfg.NET.EMA.FORCE_CPU else '',
resume=cfg.RESUME_PATH)
# imagenet validation dataset
eval_dir = os.path.join(cfg.DATA_DIR, 'val')
if not os.path.exists(eval_dir) and args.local_rank == 0:
logger.error(
'Validation folder does not exist at: {}'.format(eval_dir))
exit(1)
dataset_eval = Dataset(eval_dir)
loader_eval = create_loader(
dataset_eval,
input_size=(3, cfg.DATASET.IMAGE_SIZE, cfg.DATASET.IMAGE_SIZE),
batch_size=cfg.DATASET.VAL_BATCH_MUL * cfg.DATASET.BATCH_SIZE,
is_training=False,
num_workers=cfg.WORKERS,
distributed=True,
pin_memory=cfg.DATASET.PIN_MEM,
crop_pct=DEFAULT_CROP_PCT,
mean=IMAGENET_DEFAULT_MEAN,
std=IMAGENET_DEFAULT_STD
)
# only test accuracy of model-EMA
validate_loss_fn = nn.CrossEntropyLoss().cuda()
validate(0, model_ema.ema, loader_eval, validate_loss_fn, cfg,
log_suffix='_EMA', logger=logger,
writer=writer, local_rank=args.local_rank)
if __name__ == '__main__':
main()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
import os
import sys
import datetime
import torch
import numpy as np
import torch.nn as nn
# import timm packages
from timm.loss import LabelSmoothingCrossEntropy
from timm.data import Dataset, create_loader
from timm.models import resume_checkpoint
# import apex as distributed package
try:
from apex.parallel import DistributedDataParallel as DDP
from apex.parallel import convert_syncbn_model
USE_APEX = True
except ImportError:
from torch.nn.parallel import DistributedDataParallel as DDP
USE_APEX = False
# import models and training functions
from lib.utils.flops_table import FlopsEst
from lib.models.structures.supernet import gen_supernet
from lib.config import DEFAULT_CROP_PCT, IMAGENET_DEFAULT_STD, IMAGENET_DEFAULT_MEAN
from lib.utils.util import parse_config_args, get_logger, \
create_optimizer_supernet, create_supernet_scheduler
from nni.nas.pytorch.callbacks import LRSchedulerCallback
from nni.nas.pytorch.callbacks import ModelCheckpoint
from nni.algorithms.nas.pytorch.cream import CreamSupernetTrainer
from nni.algorithms.nas.pytorch.random import RandomMutator
def main():
args, cfg = parse_config_args('nni.cream.supernet')
# resolve logging
output_dir = os.path.join(cfg.SAVE_PATH,
"{}-{}".format(datetime.date.today().strftime('%m%d'),
cfg.MODEL))
if not os.path.exists(output_dir):
os.mkdir(output_dir)
if args.local_rank == 0:
logger = get_logger(os.path.join(output_dir, "train.log"))
else:
logger = None
# initialize distributed parameters
torch.cuda.set_device(args.local_rank)
torch.distributed.init_process_group(backend='nccl', init_method='env://')
if args.local_rank == 0:
logger.info(
'Training on Process %d with %d GPUs.',
args.local_rank, cfg.NUM_GPU)
# fix random seeds
torch.manual_seed(cfg.SEED)
torch.cuda.manual_seed_all(cfg.SEED)
np.random.seed(cfg.SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# generate supernet
model, sta_num, resolution = gen_supernet(
flops_minimum=cfg.SUPERNET.FLOPS_MINIMUM,
flops_maximum=cfg.SUPERNET.FLOPS_MAXIMUM,
num_classes=cfg.DATASET.NUM_CLASSES,
drop_rate=cfg.NET.DROPOUT_RATE,
global_pool=cfg.NET.GP,
resunit=cfg.SUPERNET.RESUNIT,
dil_conv=cfg.SUPERNET.DIL_CONV,
slice=cfg.SUPERNET.SLICE,
verbose=cfg.VERBOSE,
logger=logger)
# number of choice blocks in supernet
choice_num = len(model.blocks[7])
if args.local_rank == 0:
logger.info('Supernet created, param count: %d', (
sum([m.numel() for m in model.parameters()])))
logger.info('resolution: %d', (resolution))
logger.info('choice number: %d', (choice_num))
# initialize flops look-up table
model_est = FlopsEst(model)
flops_dict, flops_fixed = model_est.flops_dict, model_est.flops_fixed
# optionally resume from a checkpoint
optimizer_state = None
resume_epoch = None
if cfg.AUTO_RESUME:
optimizer_state, resume_epoch = resume_checkpoint(
model, cfg.RESUME_PATH)
# create optimizer and resume from checkpoint
optimizer = create_optimizer_supernet(cfg, model, USE_APEX)
if optimizer_state is not None:
optimizer.load_state_dict(optimizer_state['optimizer'])
model = model.cuda()
# convert model to distributed mode
if cfg.BATCHNORM.SYNC_BN:
try:
if USE_APEX:
model = convert_syncbn_model(model)
else:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
if args.local_rank == 0:
logger.info('Converted model to use Synchronized BatchNorm.')
except Exception as exception:
logger.info(
'Failed to enable Synchronized BatchNorm. '
'Install Apex or Torch >= 1.1 with Exception %s', exception)
if USE_APEX:
model = DDP(model, delay_allreduce=True)
else:
if args.local_rank == 0:
logger.info(
"Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP.")
# can use device str in Torch >= 1.1
model = DDP(model, device_ids=[args.local_rank])
# create learning rate scheduler
lr_scheduler, num_epochs = create_supernet_scheduler(cfg, optimizer)
start_epoch = resume_epoch if resume_epoch is not None else 0
if start_epoch > 0:
lr_scheduler.step(start_epoch)
if args.local_rank == 0:
logger.info('Scheduled epochs: %d', num_epochs)
# imagenet train dataset
train_dir = os.path.join(cfg.DATA_DIR, 'train')
if not os.path.exists(train_dir):
logger.info('Training folder does not exist at: %s', train_dir)
sys.exit()
dataset_train = Dataset(train_dir)
loader_train = create_loader(
dataset_train,
input_size=(3, cfg.DATASET.IMAGE_SIZE, cfg.DATASET.IMAGE_SIZE),
batch_size=cfg.DATASET.BATCH_SIZE,
is_training=True,
use_prefetcher=True,
re_prob=cfg.AUGMENTATION.RE_PROB,
re_mode=cfg.AUGMENTATION.RE_MODE,
color_jitter=cfg.AUGMENTATION.COLOR_JITTER,
interpolation='random',
num_workers=cfg.WORKERS,
distributed=True,
collate_fn=None,
crop_pct=DEFAULT_CROP_PCT,
mean=IMAGENET_DEFAULT_MEAN,
std=IMAGENET_DEFAULT_STD
)
# imagenet validation dataset
eval_dir = os.path.join(cfg.DATA_DIR, 'val')
if not os.path.isdir(eval_dir):
logger.info('Validation folder does not exist at: %s', eval_dir)
sys.exit()
dataset_eval = Dataset(eval_dir)
loader_eval = create_loader(
dataset_eval,
input_size=(3, cfg.DATASET.IMAGE_SIZE, cfg.DATASET.IMAGE_SIZE),
batch_size=4 * cfg.DATASET.BATCH_SIZE,
is_training=False,
use_prefetcher=True,
num_workers=cfg.WORKERS,
distributed=True,
crop_pct=DEFAULT_CROP_PCT,
mean=IMAGENET_DEFAULT_MEAN,
std=IMAGENET_DEFAULT_STD,
interpolation=cfg.DATASET.INTERPOLATION
)
# whether to use label smoothing
if cfg.AUGMENTATION.SMOOTHING > 0.:
train_loss_fn = LabelSmoothingCrossEntropy(
smoothing=cfg.AUGMENTATION.SMOOTHING).cuda()
validate_loss_fn = nn.CrossEntropyLoss().cuda()
else:
train_loss_fn = nn.CrossEntropyLoss().cuda()
validate_loss_fn = train_loss_fn
mutator = RandomMutator(model)
trainer = CreamSupernetTrainer(model, train_loss_fn, validate_loss_fn,
optimizer, num_epochs, loader_train, loader_eval,
mutator=mutator, batch_size=cfg.DATASET.BATCH_SIZE,
log_frequency=cfg.LOG_INTERVAL,
meta_sta_epoch=cfg.SUPERNET.META_STA_EPOCH,
update_iter=cfg.SUPERNET.UPDATE_ITER,
slices=cfg.SUPERNET.SLICE,
pool_size=cfg.SUPERNET.POOL_SIZE,
pick_method=cfg.SUPERNET.PICK_METHOD,
choice_num=choice_num, sta_num=sta_num, acc_gap=cfg.ACC_GAP,
flops_dict=flops_dict, flops_fixed=flops_fixed, local_rank=args.local_rank,
callbacks=[LRSchedulerCallback(lr_scheduler),
ModelCheckpoint(output_dir)])
trainer.train()
if __name__ == '__main__':
main()
...@@ -11,7 +11,7 @@ import torch.nn as nn ...@@ -11,7 +11,7 @@ import torch.nn as nn
import datasets import datasets
from model import CNN from model import CNN
from nni.nas.pytorch.callbacks import ArchitectureCheckpoint, LRSchedulerCallback from nni.nas.pytorch.callbacks import ArchitectureCheckpoint, LRSchedulerCallback
from nni.nas.pytorch.darts import DartsTrainer from nni.algorithms.nas.pytorch.darts import DartsTrainer
from utils import accuracy from utils import accuracy
logger = logging.getLogger('nni') logger = logging.getLogger('nni')
......
...@@ -11,7 +11,7 @@ import torch.nn as nn ...@@ -11,7 +11,7 @@ import torch.nn as nn
import datasets import datasets
from macro import GeneralNetwork from macro import GeneralNetwork
from micro import MicroNetwork from micro import MicroNetwork
from nni.nas.pytorch import enas from nni.algorithms.nas.pytorch import enas
from nni.nas.pytorch.callbacks import (ArchitectureCheckpoint, from nni.nas.pytorch.callbacks import (ArchitectureCheckpoint,
LRSchedulerCallback) LRSchedulerCallback)
from utils import accuracy, reward_accuracy from utils import accuracy, reward_accuracy
......
...@@ -10,7 +10,7 @@ import torch ...@@ -10,7 +10,7 @@ import torch
import torch.nn as nn import torch.nn as nn
from nni.nas.pytorch.callbacks import ArchitectureCheckpoint from nni.nas.pytorch.callbacks import ArchitectureCheckpoint
from nni.nas.pytorch.pdarts import PdartsTrainer from nni.algorithms.nas.pytorch.pdarts import PdartsTrainer
# prevent it to be reordered. # prevent it to be reordered.
if True: if True:
......
...@@ -7,7 +7,7 @@ import datasets ...@@ -7,7 +7,7 @@ import datasets
from putils import get_parameters from putils import get_parameters
from model import SearchMobileNet from model import SearchMobileNet
from nni.nas.pytorch.proxylessnas import ProxylessNasTrainer from nni.algorithms.nas.pytorch.proxylessnas import ProxylessNasTrainer
from retrain import Retrain from retrain import Retrain
logger = logging.getLogger('nni_proxylessnas') logger = logging.getLogger('nni_proxylessnas')
......
...@@ -10,7 +10,7 @@ import torch ...@@ -10,7 +10,7 @@ import torch
import torch.nn as nn import torch.nn as nn
from nni.nas.pytorch.callbacks import LRSchedulerCallback from nni.nas.pytorch.callbacks import LRSchedulerCallback
from nni.nas.pytorch.callbacks import ModelCheckpoint from nni.nas.pytorch.callbacks import ModelCheckpoint
from nni.nas.pytorch.spos import SPOSSupernetTrainingMutator, SPOSSupernetTrainer from nni.algorithms.nas.pytorch.spos import SPOSSupernetTrainingMutator, SPOSSupernetTrainer
from dataloader import get_imagenet_iter_dali from dataloader import get_imagenet_iter_dali
from network import ShuffleNetV2OneShot, load_and_parse_state_dict from network import ShuffleNetV2OneShot, load_and_parse_state_dict
......
...@@ -11,7 +11,7 @@ import nni ...@@ -11,7 +11,7 @@ import nni
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
from nni.nas.pytorch.classic_nas import get_and_apply_next_architecture from nni.algorithms.nas.pytorch.classic_nas import get_and_apply_next_architecture
from nni.nas.pytorch.utils import AverageMeterGroup from nni.nas.pytorch.utils import AverageMeterGroup
from dataloader import get_imagenet_iter_dali from dataloader import get_imagenet_iter_dali
......
...@@ -11,7 +11,7 @@ import numpy as np ...@@ -11,7 +11,7 @@ import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
from nni.nas.pytorch.enas import EnasMutator, EnasTrainer from nni.algorithms.nas.pytorch.enas import EnasMutator, EnasTrainer
from nni.nas.pytorch.callbacks import LRSchedulerCallback from nni.nas.pytorch.callbacks import LRSchedulerCallback
from dataloader import read_data_sst from dataloader import read_data_sst
......
authorName: default
experimentName: example_mnist_hyperband
trialConcurrency: 2
maxExecDuration: 100h
maxTrialNum: 10000
#choice: local, remote, pai
trainingServicePlatform: local
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
advisor:
#choice: Hyperband, BOHB
builtinAdvisorName: Hyperband
classArgs:
#R: the maximum trial budget (could be the number of mini-batches or epochs) can be
# allocated to a trial. Each trial should use trial budget to control how long it runs.
R: 60
#eta: proportion of discarded trials
eta: 3
#choice: maximize, minimize
optimize_mode: maximize
#choice: serial, parallelism
exec_mode: serial
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
A test for hyperband, using nasbench201. So it need install the dependencies for nasbench201 at first.
"""
import argparse
import logging
import random
import time
import nni
from nni.utils import merge_parameter
from nni.nas.benchmarks.nasbench201 import query_nb201_trial_stats
logger = logging.getLogger('test_hyperband')
def main(args):
r = args.pop('TRIAL_BUDGET')
dataset = [t for t in query_nb201_trial_stats(args, 200, 'cifar100', include_intermediates=True)]
test_acc = random.choice(dataset)['intermediates'][r - 1]['ori_test_acc'] / 100
time.sleep(random.randint(0, 10))
nni.report_final_result(test_acc)
logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.')
def get_params():
parser = argparse.ArgumentParser(description='Hyperband Test')
parser.add_argument("--0_1", type=str, default='none')
parser.add_argument("--0_2", type=str, default='none')
parser.add_argument("--0_3", type=str, default='none')
parser.add_argument("--1_2", type=str, default='none')
parser.add_argument("--1_3", type=str, default='none')
parser.add_argument("--2_3", type=str, default='none')
parser.add_argument("--TRIAL_BUDGET", type=int, default=200)
args, _ = parser.parse_known_args()
return args
if __name__ == '__main__':
try:
# get parameters form tuner
tuner_params = nni.get_next_parameter()
logger.debug(tuner_params)
params = vars(merge_parameter(get_params(), tuner_params))
print(params)
main(params)
except Exception as exception:
logger.exception(exception)
raise
{
"0_1": {"_type": "choice", "_value": ["none", "skip_connect", "conv_1x1", "conv_3x3", "avg_pool_3x3"]},
"0_2": {"_type": "choice", "_value": ["none", "skip_connect", "conv_1x1", "conv_3x3", "avg_pool_3x3"]},
"0_3": {"_type": "choice", "_value": ["none", "skip_connect", "conv_1x1", "conv_3x3", "avg_pool_3x3"]},
"1_2": {"_type": "choice", "_value": ["none", "skip_connect", "conv_1x1", "conv_3x3", "avg_pool_3x3"]},
"1_3": {"_type": "choice", "_value": ["none", "skip_connect", "conv_1x1", "conv_3x3", "avg_pool_3x3"]},
"2_3": {"_type": "choice", "_value": ["none", "skip_connect", "conv_1x1", "conv_3x3", "avg_pool_3x3"]}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment