Unverified Commit f2f58dbb authored by Zhenhua Han's avatar Zhenhua Han Committed by GitHub
Browse files

[Retiarii] cross-graph optimization: device placement and input deduplication (#3202)

parent 6645bd33
import json
import os
import sys
import torch
from pathlib import Path
import nni.retiarii.evaluator.pytorch.cgo.evaluator as cgo
import nni.retiarii.evaluator.pytorch.lightning as pl
import nni.retiarii.strategy as strategy
from nni.retiarii import serialize
from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig
from torchvision import transforms
from torchvision.datasets import CIFAR10
from darts_model import CNN
if __name__ == '__main__':
base_model = CNN(32, 3, 16, 10, 8)
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
valid_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
train_dataset = serialize(CIFAR10, root='data/cifar10', train=True, download=True, transform=train_transform)
test_dataset = serialize(CIFAR10, root='data/cifar10', train=False, download=True, transform=valid_transform)
trainer = cgo.Classification(train_dataloader=pl.DataLoader(train_dataset, batch_size=100),
val_dataloaders=pl.DataLoader(test_dataset, batch_size=100),
max_epochs=1, limit_train_batches=0.2)
simple_strategy = strategy.Random()
exp = RetiariiExperiment(base_model, trainer, [], simple_strategy)
exp_config = RetiariiExeConfig('local')
exp_config.experiment_name = 'darts_search'
exp_config.execution_engine = 'cgo'
exp_config.trial_concurrency = 3
# since CGO may merge multiple trials into one, RetiariiExperiment may run more trials than max_trial_number
# when max_trial_number = 3, it actually runs 9 models since each merged trial contains 3 trials from strategy
exp_config.max_trial_number = 100
exp_config.devices = ['cuda:0', 'cuda:1', 'cuda:2']
exp_config.trial_gpu_number = 1
exp_config.batch_waiting_time = 100
exp_config.training_service.use_active_gpu = True
exp_config.training_service.gpu_indices = [0, 1, 2]
exp.run(exp_config, 8081)
from nni.retiarii import basic_unit
import nni.retiarii.nn.pytorch as nn
import warnings
import torch
import torch.nn as torch_nn
from torchvision.models.utils import load_state_dict_from_url
import torch.nn.functional as F
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parents[2]))
# Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is
# 1.0 - tensorflow.
_BN_MOMENTUM = 1 - 0.9997
_FIRST_DEPTH = 32
_MOBILENET_V2_FILTERS = [16, 24, 32, 64, 96, 160, 320]
_MOBILENET_V2_NUM_LAYERS = [1, 2, 3, 4, 3, 3, 1]
class _ResidualBlock(nn.Module):
def __init__(self, net):
super().__init__()
self.net = net
def forward(self, x):
return self.net(x) + x
class _InvertedResidual(nn.Module):
def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor, skip, bn_momentum=0.1):
super(_InvertedResidual, self).__init__()
assert stride in [1, 2]
assert kernel_size in [3, 5]
mid_ch = in_ch * expansion_factor
self.apply_residual = skip and in_ch == out_ch and stride == 1
self.layers = nn.Sequential(
# Pointwise
nn.Conv2d(in_ch, mid_ch, 1, bias=False),
nn.BatchNorm2d(mid_ch, momentum=bn_momentum),
nn.ReLU(inplace=True),
# Depthwise
nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=kernel_size // 2,
stride=stride, groups=mid_ch, bias=False),
nn.BatchNorm2d(mid_ch, momentum=bn_momentum),
nn.ReLU(inplace=True),
# Linear pointwise. Note that there's no activation.
nn.Conv2d(mid_ch, out_ch, 1, bias=False),
nn.BatchNorm2d(out_ch, momentum=bn_momentum))
def forward(self, input):
if self.apply_residual:
ret = self.layers(input) + input
else:
ret = self.layers(input)
return ret
def _stack_inverted_residual(in_ch, out_ch, kernel_size, skip, stride, exp_factor, repeats, bn_momentum):
""" Creates a stack of inverted residuals. """
assert repeats >= 1
# First one has no skip, because feature map size changes.
first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor, skip, bn_momentum=bn_momentum)
remaining = []
for _ in range(1, repeats):
remaining.append(_InvertedResidual(out_ch, out_ch, kernel_size, 1, exp_factor, skip, bn_momentum=bn_momentum))
return nn.Sequential(first, *remaining)
def _stack_normal_conv(in_ch, out_ch, kernel_size, skip, dconv, stride, repeats, bn_momentum):
assert repeats >= 1
stack = []
for i in range(repeats):
s = stride if i == 0 else 1
if dconv:
modules = [
nn.Conv2d(in_ch, in_ch, kernel_size, padding=kernel_size // 2, stride=s, groups=in_ch, bias=False),
nn.BatchNorm2d(in_ch, momentum=bn_momentum),
nn.ReLU(inplace=True),
nn.Conv2d(in_ch, out_ch, 1, padding=0, stride=1, bias=False),
nn.BatchNorm2d(out_ch, momentum=bn_momentum)
]
else:
modules = [
nn.Conv2d(in_ch, out_ch, kernel_size, padding=kernel_size // 2, stride=s, bias=False),
nn.ReLU(inplace=True),
nn.BatchNorm2d(out_ch, momentum=bn_momentum)
]
if skip and in_ch == out_ch and s == 1:
# use different implementation for skip and noskip to align with pytorch
stack.append(_ResidualBlock(nn.Sequential(*modules)))
else:
stack += modules
in_ch = out_ch
return stack
def _round_to_multiple_of(val, divisor, round_up_bias=0.9):
""" Asymmetric rounding to make `val` divisible by `divisor`. With default
bias, will round up, unless the number is no more than 10% greater than the
smaller divisible value, i.e. (83, 8) -> 80, but (84, 8) -> 88. """
assert 0.0 < round_up_bias < 1.0
new_val = max(divisor, int(val + divisor / 2) // divisor * divisor)
return new_val if new_val >= round_up_bias * val else new_val + divisor
def _get_depths(depths, alpha):
""" Scales tensor depths as in reference MobileNet code, prefers rouding up
rather than down. """
return [_round_to_multiple_of(depth * alpha, 8) for depth in depths]
class MNASNet(nn.Module):
""" MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
implements the B1 variant of the model.
>>> model = MNASNet(1000, 1.0)
>>> x = torch.rand(1, 3, 224, 224)
>>> y = model(x)
>>> y.dim()
1
>>> y.nelement()
1000
"""
# Version 2 adds depth scaling in the initial stages of the network.
_version = 2
def __init__(self, alpha, depths, convops, kernel_sizes, num_layers,
skips, num_classes=1000, dropout=0.2):
super().__init__()
assert alpha > 0.0
assert len(depths) == len(convops) == len(kernel_sizes) == len(num_layers) == len(skips) == 7
self.alpha = alpha
self.num_classes = num_classes
depths = _get_depths([_FIRST_DEPTH] + depths, alpha)
base_filter_sizes = [16, 24, 40, 80, 96, 192, 320]
exp_ratios = [3, 3, 3, 6, 6, 6, 6]
strides = [1, 2, 2, 2, 1, 2, 1]
layers = [
# First layer: regular conv.
nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False),
nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM),
nn.ReLU(inplace=True),
]
count = 0
# for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \
# zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios):
for filter_size, exp_ratio, stride in zip(base_filter_sizes, exp_ratios, strides):
# TODO: restrict that "choose" can only be used within mutator
ph = nn.Placeholder(label=f'mutable_{count}', **{
'kernel_size_options': [1, 3, 5],
'n_layer_options': [1, 2, 3, 4],
'op_type_options': ['__mutated__.base_mnasnet.RegularConv',
'__mutated__.base_mnasnet.DepthwiseConv',
'__mutated__.base_mnasnet.MobileConv'],
# 'se_ratio_options': [0, 0.25],
'skip_options': ['identity', 'no'],
'n_filter_options': [int(filter_size*x) for x in [0.75, 1.0, 1.25]],
'exp_ratio': exp_ratio,
'stride': stride,
'in_ch': depths[0] if count == 0 else None
})
layers.append(ph)
'''if conv == "mconv":
# MNASNet blocks: stacks of inverted residuals.
layers.append(_stack_inverted_residual(prev_depth, depth, ks, skip,
stride, exp_ratio, repeat, _BN_MOMENTUM))
else:
# Normal conv and depth-separated conv
layers += _stack_normal_conv(prev_depth, depth, ks, skip, conv == "dconv",
stride, repeat, _BN_MOMENTUM)'''
count += 1
if count >= 2:
break
layers += [
# Final mapping to classifier input.
nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False),
nn.BatchNorm2d(1280, momentum=_BN_MOMENTUM),
nn.ReLU(inplace=True),
]
self.layers = nn.Sequential(*layers)
self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True),
nn.Linear(1280, num_classes))
self._initialize_weights()
#self.for_test = 10
def forward(self, x):
# if self.for_test == 10:
x = self.layers(x)
# Equivalent to global avgpool and removing H and W dimensions.
x = x.mean([2, 3])
x = F.relu(x)
return self.classifier(x)
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch_nn.init.kaiming_normal_(m.weight, mode="fan_out",
nonlinearity="relu")
if m.bias is not None:
torch_nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
torch_nn.init.ones_(m.weight)
torch_nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
torch_nn.init.kaiming_uniform_(m.weight, mode="fan_out",
nonlinearity="sigmoid")
torch_nn.init.zeros_(m.bias)
def test_model(model):
model(torch.randn(2, 3, 224, 224))
# ====================definition of candidate op classes
BN_MOMENTUM = 1 - 0.9997
class RegularConv(nn.Module):
def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
super().__init__()
self.kernel_size = kernel_size
self.in_ch = in_ch
self.out_ch = out_ch
self.skip = skip
self.exp_ratio = exp_ratio
self.stride = stride
self.conv = nn.Conv2d(in_ch, out_ch, kernel_size, padding=kernel_size // 2, stride=stride, bias=False)
self.relu = nn.ReLU(inplace=True)
self.bn = nn.BatchNorm2d(out_ch, momentum=BN_MOMENTUM)
def forward(self, x):
out = self.bn(self.relu(self.conv(x)))
if self.skip == 'identity':
out = out + x
return out
class DepthwiseConv(nn.Module):
def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
super().__init__()
self.kernel_size = kernel_size
self.in_ch = in_ch
self.out_ch = out_ch
self.skip = skip
self.exp_ratio = exp_ratio
self.stride = stride
self.conv1 = nn.Conv2d(in_ch, in_ch, kernel_size, padding=kernel_size // 2, stride=stride, groups=in_ch, bias=False)
self.bn1 = nn.BatchNorm2d(in_ch, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(in_ch, out_ch, 1, padding=0, stride=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_ch, momentum=BN_MOMENTUM)
def forward(self, x):
out = self.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
if self.skip == 'identity':
out = out + x
return out
class MobileConv(nn.Module):
def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
super().__init__()
self.kernel_size = kernel_size
self.in_ch = in_ch
self.out_ch = out_ch
self.skip = skip
self.exp_ratio = exp_ratio
self.stride = stride
mid_ch = in_ch * exp_ratio
self.layers = nn.Sequential(
# Pointwise
nn.Conv2d(in_ch, mid_ch, 1, bias=False),
nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True),
# Depthwise
nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=(kernel_size - 1) // 2,
stride=stride, groups=mid_ch, bias=False),
nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True),
# Linear pointwise. Note that there's no activation.
nn.Conv2d(mid_ch, out_ch, 1, bias=False),
nn.BatchNorm2d(out_ch, momentum=BN_MOMENTUM))
def forward(self, x):
out = self.layers(x)
if self.skip == 'identity':
out = out + x
return out
# mnasnet0_5
ir_module = _InvertedResidual(16, 16, 3, 1, 1, True)
import logging
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parents[2]))
from nni.retiarii import Mutator
from base_mnasnet import RegularConv, DepthwiseConv, MobileConv
_logger = logging.getLogger(__name__)
class BlockMutator(Mutator):
def __init__(self, target: str):
super(BlockMutator, self).__init__()
self.target = target
def mutate(self, model):
nodes = model.get_nodes_by_label(self.target)
assert len(nodes) == 1
node = nodes[0]
graph = node.graph
related_info = node.operation.parameters
kernel_size = self.choice(related_info['kernel_size_options'])
op_type = self.choice(related_info['op_type_options'])
#self.choice(related_info['se_ratio_options'])
skip = self.choice(related_info['skip_options'])
n_filter = self.choice(related_info['n_filter_options'])
if related_info['in_ch'] is not None:
in_ch = related_info['in_ch']
else:
assert len(node.predecessors) == 1
the_node = node.predecessors[0]
_logger.debug(repr(the_node.operation.parameters))
_logger.debug(the_node.__repr__())
in_ch = the_node.operation.parameters['out_ch']
# update the placeholder to be a new operation
node.update_operation(op_type, {
'kernel_size': kernel_size,
'in_ch': in_ch,
'out_ch': n_filter,
'skip': 'no',
'exp_ratio': related_info['exp_ratio'],
'stride': related_info['stride']
})
# insert new nodes after the placeholder
n_layer = self.choice(related_info['n_layer_options'])
for i in range(1, n_layer):
node = graph.insert_node_on_edge(node.outgoing_edges[0],
'{}_{}'.format(self.target, i),
op_type,
{'kernel_size': kernel_size,
'in_ch': n_filter,
'out_ch': n_filter,
'skip': skip,
'exp_ratio': related_info['exp_ratio'],
'stride': 1})
# fix possible shape mismatch
# TODO: use formal method function to update parameters
if len(node.successors) == 1 and 'in_channels' in node.successors[0].operation.parameters:
node.successors[0].operation.parameters['in_channels'] = n_filter
\ No newline at end of file
import os
import sys
import torch
from pathlib import Path
import nni.retiarii.evaluator.pytorch.lightning as pl
import nni.retiarii.evaluator.pytorch.cgo.evaluator as cgo
from nni.retiarii import serialize
from base_mnasnet import MNASNet
from nni.experiment import RemoteMachineConfig
from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig
from nni.retiarii.strategy import TPEStrategy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from mutator import BlockMutator
if __name__ == '__main__':
_DEFAULT_DEPTHS = [16, 24, 40, 80, 96, 192, 320]
_DEFAULT_CONVOPS = ["dconv", "mconv", "mconv", "mconv", "mconv", "mconv", "mconv"]
_DEFAULT_SKIPS = [False, True, True, True, True, True, True]
_DEFAULT_KERNEL_SIZES = [3, 3, 5, 5, 3, 5, 3]
_DEFAULT_NUM_LAYERS = [1, 3, 3, 3, 2, 4, 1]
base_model = MNASNet(0.5, _DEFAULT_DEPTHS, _DEFAULT_CONVOPS, _DEFAULT_KERNEL_SIZES,
_DEFAULT_NUM_LAYERS, _DEFAULT_SKIPS)
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
valid_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
train_dataset = serialize(CIFAR10, root='data/cifar10', train=True, download=True, transform=train_transform)
test_dataset = serialize(CIFAR10, root='data/cifar10', train=False, download=True, transform=valid_transform)
# trainer = pl.Classification(train_dataloader=pl.DataLoader(train_dataset, batch_size=100),
# val_dataloaders=pl.DataLoader(test_dataset, batch_size=100),
# max_epochs=1, limit_train_batches=0.2)
trainer = cgo.Classification(train_dataloader=pl.DataLoader(train_dataset, batch_size=100),
val_dataloaders=pl.DataLoader(test_dataset, batch_size=100),
max_epochs=1, limit_train_batches=0.2)
applied_mutators = [
BlockMutator('mutable_0'),
BlockMutator('mutable_1')
]
simple_strategy = TPEStrategy()
exp = RetiariiExperiment(base_model, trainer, applied_mutators, simple_strategy)
exp_config = RetiariiExeConfig('remote')
exp_config.experiment_name = 'darts_search'
exp_config.trial_concurrency = 3
exp_config.max_trial_number = 10
exp_config.trial_gpu_number = 1
exp_config.training_service.use_active_gpu = True
exp_config.training_service.reuse_mode = True
exp_config.training_service.gpu_indices = [0, 1, 2]
exp_config.max_concurrency_cgo = 1
exp_config.batch_waiting_time = 0
rm_conf = RemoteMachineConfig()
rm_conf.host = '127.0.0.1'
rm_conf.user = 'xxx'
rm_conf.password = 'xxx'
rm_conf.port = 22
rm_conf.python_path = '/home/xxx/py38/bin'
rm_conf.gpu_indices = [0, 1, 2]
rm_conf.use_active_gpu = True
rm_conf.max_trial_number_per_gpu = 3
exp_config.training_service.machine_list = [rm_conf]
exp_config.execution_engine = 'cgo'
exp.run(exp_config, 8099)
\ No newline at end of file
......@@ -31,7 +31,8 @@ if __name__ == '__main__':
test_dataset = serialize(CIFAR10, root='data/cifar10', train=False, download=True, transform=valid_transform)
trainer = pl.Classification(train_dataloader=pl.DataLoader(train_dataset, batch_size=100),
val_dataloaders=pl.DataLoader(test_dataset, batch_size=100),
max_epochs=1, limit_train_batches=0.2)
max_epochs=1, limit_train_batches=0.2,
progress_bar_refresh_rate=0)
simple_strategy = strategy.Random()
......
{
"_model__stem":{
"inputs":[
"_inputs__1"
],
"outputs":[
"pool2__1"
],
"nodes":{
"_model__stem__conv1":{
"operation":{
"type":"__torch__.torch.nn.modules.conv.Conv2d",
"parameters":{
"out_channels":32,
"in_channels":1,
"kernel_size":5
}
}
},
"_model__stem__pool1":{
"operation":{
"type":"__torch__.torch.nn.modules.pooling.MaxPool2d",
"parameters":{
"kernel_size":2
}
}
},
"_model__stem__conv2":{
"operation":{
"type":"__torch__.torch.nn.modules.conv.Conv2d",
"parameters":{
"out_channels":64,
"in_channels":32,
"kernel_size":5
}
}
},
"_model__stem__pool2":{
"operation":{
"type":"__torch__.torch.nn.modules.pooling.MaxPool2d",
"parameters":{
"kernel_size":2
}
}
}
},
"edges":[
{
"head":[
"_inputs",
0
],
"tail":[
"_model__stem__conv1",
0
]
},
{
"head":[
"_model__stem__conv1",
null
],
"tail":[
"_model__stem__pool1",
0
]
},
{
"head":[
"_model__stem__pool1",
null
],
"tail":[
"_model__stem__conv2",
0
]
},
{
"head":[
"_model__stem__conv2",
null
],
"tail":[
"_model__stem__pool2",
0
]
},
{
"head":[
"_model__stem__pool2",
null
],
"tail":[
"_outputs",
null
]
}
]
},
"_model":{
"inputs":[
"image__1"
],
"outputs":[
"softmax__1"
],
"nodes":{
"_model__Constant2":{
"operation":{
"type":"prim::Constant",
"parameters":{
}
}
},
"_model__Constant3":{
"operation":{
"type":"prim::Constant",
"parameters":{
"value":3
}
}
},
"_model__Constant4":{
"operation":{
"type":"prim::Constant",
"parameters":{
"value":-1
}
}
},
"_model__Constant5":{
"operation":{
"type":"prim::Constant",
"parameters":{
"value":0
}
}
},
"_model__stem":{
"operation":{
"type":"_cell",
"parameters":{
},
"cell_name":"_model__stem"
}
},
"_model__Size6":{
"operation":{
"type":"aten::size",
"parameters":{
}
}
},
"_model__ListConstruct7":{
"operation":{
"type":"prim::ListConstruct",
"parameters":{
}
}
},
"_model__View8":{
"operation":{
"type":"aten::view",
"parameters":{
}
}
},
"_model__fc1":{
"operation":{
"type":"__torch__.torch.nn.modules.linear.Linear",
"parameters":{
"in_features":1024,
"out_features":256
}
}
},
"_model__fc2":{
"operation":{
"type":"__torch__.torch.nn.modules.linear.Linear",
"parameters":{
"in_features":256,
"out_features":10
}
}
},
"_model__softmax9":{
"operation":{
"type":"Function.softmax",
"parameters":{
}
}
}
},
"edges":[
{
"head":[
"_inputs",
0
],
"tail":[
"_model__stem",
0
]
},
{
"head":[
"_model__stem",
null
],
"tail":[
"_model__Size6",
0
]
},
{
"head":[
"_model__Constant5",
null
],
"tail":[
"_model__Size6",
1
]
},
{
"head":[
"_model__Size6",
null
],
"tail":[
"_model__ListConstruct7",
0
]
},
{
"head":[
"_model__Constant4",
null
],
"tail":[
"_model__ListConstruct7",
1
]
},
{
"head":[
"_model__stem",
null
],
"tail":[
"_model__View8",
0
]
},
{
"head":[
"_model__ListConstruct7",
null
],
"tail":[
"_model__View8",
1
]
},
{
"head":[
"_model__View8",
null
],
"tail":[
"_model__fc1",
0
]
},
{
"head":[
"_model__fc1",
null
],
"tail":[
"_model__fc2",
0
]
},
{
"head":[
"_model__fc2",
null
],
"tail":[
"_model__softmax9",
0
]
},
{
"head":[
"_model__Constant4",
null
],
"tail":[
"_model__softmax9",
1
]
},
{
"head":[
"_model__Constant3",
null
],
"tail":[
"_model__softmax9",
2
]
},
{
"head":[
"_model__Constant2",
null
],
"tail":[
"_model__softmax9",
3
]
},
{
"head":[
"_model__softmax9",
null
],
"tail":[
"_outputs",
null
]
}
]
},
"_evaluator": {
"module": "nni.retiarii.trainer.PyTorchImageClassificationTrainer",
"kwargs": {
"dataset_cls": "MNIST",
"dataset_kwargs": {
"root": "data/mnist",
"download": true
},
"dataloader_kwargs": {
"batch_size": 32
},
"optimizer_cls" : "SGD",
"optimizer_kwargs": {
"lr": 1e-3
},
"trainer_kwargs": {
"max_epochs": 1
}
}
}
}
\ No newline at end of file
import json
import os
import sys
import threading
import unittest
import logging
import time
import torch
import torch.nn as nn
from pathlib import Path
from nni.retiarii.execution.cgo_engine import CGOExecutionEngine
from nni.retiarii.execution.logical_optimizer.logical_plan import LogicalPlan
from nni.retiarii.execution.logical_optimizer.opt_dedup_input import DedupInputOptimizer
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii import Model, Node
import nni
from nni.retiarii import Model, submit_models
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii.integration import RetiariiAdvisor
from nni.retiarii.evaluator.pytorch import PyTorchImageClassificationTrainer, PyTorchMultiModelTrainer
from nni.retiarii.utils import import_
try:
from nni.common.device import GPUDevice
from nni.retiarii.execution.cgo_engine import CGOExecutionEngine
from nni.retiarii import Model
from nni.retiarii.graph import Node
from nni.retiarii import Model, submit_models
from nni.retiarii.integration import RetiariiAdvisor
from nni.retiarii.execution import set_execution_engine
from nni.retiarii.execution.logical_optimizer.opt_dedup_input import DedupInputOptimizer
from nni.retiarii.execution.logical_optimizer.logical_plan import LogicalPlan
from nni.retiarii.utils import import_
from nni.retiarii import serialize
import nni.retiarii.evaluator.pytorch.lightning as pl
from nni.retiarii.evaluator.pytorch.cgo.evaluator import MultiModelSupervisedLearningModule, _MultiModelSupervisedLearningModule
import nni.retiarii.evaluator.pytorch.cgo.trainer as cgo_trainer
module_import_failed = False
except ImportError:
module_import_failed = True
import pytest
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import Dataset
from sklearn.datasets import load_diabetes
class _model_cpu(nn.Module):
def __init__(self):
super().__init__()
self.M_1_stem = M_1_stem()
self.M_2_stem = M_2_stem()
self.M_1_flatten = torch.nn.Flatten()
self.M_2_flatten = torch.nn.Flatten()
self.M_1_fc1 = torch.nn.Linear(out_features=256, in_features=1024)
self.M_2_fc1 = torch.nn.Linear(out_features=256, in_features=1024)
self.M_1_fc2 = torch.nn.Linear(out_features=10, in_features=256)
self.M_2_fc2 = torch.nn.Linear(out_features=10, in_features=256)
self.M_1_softmax = torch.nn.Softmax()
self.M_2_softmax = torch.nn.Softmax()
def forward(self, *_inputs):
M_1__inputs_to_M_2_stem = _inputs[0]
M_1_stem = self.M_1_stem(_inputs[0])
M_2_stem = self.M_2_stem(M_1__inputs_to_M_2_stem)
M_1_flatten = self.M_1_flatten(M_1_stem)
M_2_flatten = self.M_2_flatten(M_2_stem)
M_1_fc1 = self.M_1_fc1(M_1_flatten)
M_2_fc1 = self.M_2_fc1(M_2_flatten)
M_1_fc2 = self.M_1_fc2(M_1_fc1)
M_2_fc2 = self.M_2_fc2(M_2_fc1)
M_1_softmax = self.M_1_softmax(M_1_fc2)
M_2_softmax = self.M_2_softmax(M_2_fc2)
return M_1_softmax, M_2_softmax
class _model_gpu(nn.Module):
def __init__(self):
super().__init__()
self.M_1_stem = M_1_stem().to('cuda:0')
self.M_2_stem = M_2_stem().to('cuda:1')
self.M_1_flatten = torch.nn.Flatten().to('cuda:0')
self.M_2_flatten = torch.nn.Flatten().to('cuda:1')
self.M_1_fc1 = torch.nn.Linear(out_features=256, in_features=1024).to('cuda:0')
self.M_2_fc1 = torch.nn.Linear(out_features=256, in_features=1024).to('cuda:1')
self.M_1_fc2 = torch.nn.Linear(out_features=10, in_features=256).to('cuda:0')
self.M_2_fc2 = torch.nn.Linear(out_features=10, in_features=256).to('cuda:1')
self.M_1_softmax = torch.nn.Softmax().to('cuda:0')
self.M_2_softmax = torch.nn.Softmax().to('cuda:1')
def forward(self, *_inputs):
M_1__inputs_to_M_1_stem = _inputs[0].to("cuda:0")
M_1__inputs_to_M_2_stem = _inputs[0].to("cuda:1")
M_1_stem = self.M_1_stem(M_1__inputs_to_M_1_stem)
M_2_stem = self.M_2_stem(M_1__inputs_to_M_2_stem)
M_1_flatten = self.M_1_flatten(M_1_stem)
M_2_flatten = self.M_2_flatten(M_2_stem)
M_1_fc1 = self.M_1_fc1(M_1_flatten)
M_2_fc1 = self.M_2_fc1(M_2_flatten)
M_1_fc2 = self.M_1_fc2(M_1_fc1)
M_2_fc2 = self.M_2_fc2(M_2_fc1)
M_1_softmax = self.M_1_softmax(M_1_fc2)
M_2_softmax = self.M_2_softmax(M_2_fc2)
return M_1_softmax, M_2_softmax
class M_1_stem(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = torch.nn.Conv2d(out_channels=32, in_channels=1, kernel_size=5)
self.pool1 = torch.nn.MaxPool2d(kernel_size=2)
self.conv2 = torch.nn.Conv2d(out_channels=64, in_channels=32, kernel_size=5)
self.pool2 = torch.nn.MaxPool2d(kernel_size=2)
def forward(self, *_inputs):
conv1 = self.conv1(_inputs[0])
pool1 = self.pool1(conv1)
conv2 = self.conv2(pool1)
pool2 = self.pool2(conv2)
return pool2
class M_2_stem(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = torch.nn.Conv2d(out_channels=32, in_channels=1, kernel_size=5)
self.pool1 = torch.nn.MaxPool2d(kernel_size=2)
self.conv2 = torch.nn.Conv2d(out_channels=64, in_channels=32, kernel_size=5)
self.pool2 = torch.nn.MaxPool2d(kernel_size=2)
def forward(self, *_inputs):
conv1 = self.conv1(_inputs[0])
pool1 = self.pool1(conv1)
conv2 = self.conv2(pool1)
pool2 = self.pool2(conv2)
return pool2
def _reset():
# this is to not affect other tests in sdk
nni.trial._intermediate_seq = 0
nni.trial._params = {'foo': 'bar', 'parameter_id': 0}
nni.runtime.platform.test._last_metric = None
nni.retiarii.integration_api._advisor = None
nni.retiarii.execution.api._execution_engine = None
def _new_trainer():
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = serialize(MNIST, root='data/mnist', train=True, download=True, transform=transform)
test_dataset = serialize(MNIST, root='data/mnist', train=False, download=True, transform=transform)
multi_module = MultiModelSupervisedLearningModule(nn.CrossEntropyLoss, {'acc': pl._AccuracyWithLogits})
lightning = pl.Lightning(multi_module, cgo_trainer.Trainer(use_cgo=True,
max_epochs=1,
limit_train_batches=0.25,
progress_bar_refresh_rate=0),
train_dataloader=pl.DataLoader(train_dataset, batch_size=100),
val_dataloaders=pl.DataLoader(test_dataset, batch_size=100))
return lightning
def _load_mnist(n_models: int = 1):
path = Path(__file__).parent / 'converted_mnist_pytorch.json'
path = Path(__file__).parent / 'mnist_pytorch.json'
with open(path) as f:
mnist_model = Model._load(json.load(f))
mnist_model.evaluator = _new_trainer()
if n_models == 1:
return mnist_model
else:
models = [mnist_model]
for i in range(n_models-1):
models.append(mnist_model.fork())
for i in range(n_models - 1):
forked_model = mnist_model.fork()
forked_model.evaluator = _new_trainer()
models.append(forked_model)
return models
@unittest.skip('Skipped in this version')
def _get_final_result():
result = json.loads(nni.runtime.platform.test._last_metric)['value']
if isinstance(result, list):
return [float(_) for _ in result]
else:
if isinstance(result, str) and '[' in result:
return json.loads(result)
return [float(result)]
class CGOEngineTest(unittest.TestCase):
def setUp(self):
if module_import_failed:
self.skipTest('test skip due to failed import of nni.retiarii.evaluator.pytorch.lightning')
def test_multi_model_trainer_cpu(self):
_reset()
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = serialize(MNIST, root='data/mnist', train=True, download=True, transform=transform)
test_dataset = serialize(MNIST, root='data/mnist', train=False, download=True, transform=transform)
multi_module = _MultiModelSupervisedLearningModule(nn.CrossEntropyLoss, {'acc': pl._AccuracyWithLogits}, n_models=2)
lightning = pl.Lightning(multi_module, cgo_trainer.Trainer(use_cgo=True,
max_epochs=1,
limit_train_batches=0.25),
train_dataloader=pl.DataLoader(train_dataset, batch_size=100),
val_dataloaders=pl.DataLoader(test_dataset, batch_size=100))
lightning._execute(_model_cpu)
result = _get_final_result()
assert len(result) == 2
for _ in result:
assert _ > 0.8
def test_multi_model_trainer_gpu(self):
_reset()
if not (torch.cuda.is_available() and torch.cuda.device_count() >= 2):
pytest.skip('test requires GPU and torch+cuda')
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = serialize(MNIST, root='data/mnist', train=True, download=True, transform=transform)
test_dataset = serialize(MNIST, root='data/mnist', train=False, download=True, transform=transform)
multi_module = _MultiModelSupervisedLearningModule(nn.CrossEntropyLoss, {'acc': pl._AccuracyWithLogits}, n_models=2)
lightning = pl.Lightning(multi_module, cgo_trainer.Trainer(use_cgo=True,
max_epochs=1,
limit_train_batches=0.25),
train_dataloader=pl.DataLoader(train_dataset, batch_size=100),
val_dataloaders=pl.DataLoader(test_dataset, batch_size=100))
lightning._execute(_model_gpu)
result = _get_final_result()
assert len(result) == 2
for _ in result:
assert _ > 0.8
def _build_logical_with_mnist(self, n_models: int):
lp = LogicalPlan()
models = _load_mnist(n_models=n_models)
for m in models:
lp.add_model(m)
return lp, models
def test_add_model(self):
_reset()
lp, models = self._build_logical_with_mnist(3)
for node in lp.logical_graph.hidden_nodes:
old_nodes = [m.root_graph.get_node_by_id(node.id) for m in models]
self.assertTrue(any([old_nodes[0].__repr__() == Node.__repr__(x) for x in old_nodes]))
def test_dedup_input_four_devices(self):
_reset()
lp, models = self._build_logical_with_mnist(3)
opt = DedupInputOptimizer()
opt.convert(lp)
advisor = RetiariiAdvisor()
available_devices = [GPUDevice("test", 0), GPUDevice("test", 1), GPUDevice("test", 2), GPUDevice("test", 3)]
cgo = CGOExecutionEngine(devices=available_devices, batch_waiting_time=0)
phy_models = cgo._assemble(lp)
self.assertTrue(len(phy_models) == 1)
advisor.stopping = True
advisor.default_worker.join()
advisor.assessor_worker.join()
cgo.join()
def test_dedup_input_two_devices(self):
_reset()
lp, models = self._build_logical_with_mnist(3)
opt = DedupInputOptimizer()
opt.convert(lp)
advisor = RetiariiAdvisor()
available_devices = [GPUDevice("test", 0), GPUDevice("test", 1)]
cgo = CGOExecutionEngine(devices=available_devices, batch_waiting_time=0)
phy_models = cgo._assemble(lp)
self.assertTrue(len(phy_models) == 2)
advisor.stopping = True
advisor.default_worker.join()
advisor.assessor_worker.join()
cgo.join()
def test_submit_models(self):
os.environ['CGO'] = 'true'
_reset()
nni.retiarii.debug_configs.framework = 'pytorch'
os.makedirs('generated', exist_ok=True)
from nni.runtime import protocol, platform
from nni.runtime import protocol
import nni.runtime.platform.test as tt
protocol._out_file = open('generated/debug_protocol_out_file.py', 'wb')
protocol._in_file = open('generated/debug_protocol_out_file.py', 'rb')
models = _load_mnist(2)
advisor = RetiariiAdvisor()
cgo_engine = CGOExecutionEngine(devices=[GPUDevice("test", 0), GPUDevice("test", 1),
GPUDevice("test", 2), GPUDevice("test", 3)], batch_waiting_time=0)
set_execution_engine(cgo_engine)
submit_models(*models)
time.sleep(3)
if torch.cuda.is_available() and torch.cuda.device_count() >= 2:
cmd, data = protocol.receive()
params = json.loads(data)
params['parameters']['training_kwargs']['max_steps'] = 100
tt.init_params(params)
trial_thread = threading.Thread(target=CGOExecutionEngine.trial_execute_graph())
trial_thread = threading.Thread(target=CGOExecutionEngine.trial_execute_graph)
trial_thread.start()
last_metric = None
while True:
......@@ -66,15 +321,20 @@ class CGOEngineTest(unittest.TestCase):
metric = tt.get_last_metric()
if metric == last_metric:
continue
if 'value' in metric:
metric['value'] = json.dumps(metric['value'])
advisor.handle_report_metric_data(metric)
last_metric = metric
if not trial_thread.is_alive():
trial_thread.join()
break
trial_thread.join()
advisor.stopping = True
advisor.default_worker.join()
advisor.assessor_worker.join()
cgo_engine.join()
if __name__ == '__main__':
......
import json
import os
import sys
import threading
import unittest
import logging
import time
from pathlib import Path
from nni.retiarii.execution.cgo_engine import CGOExecutionEngine
from nni.retiarii.execution.logical_optimizer.logical_plan import LogicalPlan
from nni.retiarii.execution.logical_optimizer.opt_dedup_input import DedupInputOptimizer
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii import Model, Node
from nni.retiarii import Model, submit_models
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii.integration import RetiariiAdvisor
from nni.retiarii.utils import import_
def _load_mnist(n_models: int = 1):
path = Path(__file__).parent / 'converted_mnist_pytorch.json'
with open(path) as f:
mnist_model = Model._load(json.load(f))
if n_models == 1:
return mnist_model
else:
models = [mnist_model]
for i in range(n_models-1):
models.append(mnist_model.fork())
return models
@unittest.skip('Skipped in this version')
class DedupInputTest(unittest.TestCase):
def _build_logical_with_mnist(self, n_models: int):
lp = LogicalPlan()
models = _load_mnist(n_models=n_models)
for m in models:
lp.add_model(m)
return lp, models
def _test_add_model(self):
lp, models = self._build_logical_with_mnist(3)
for node in lp.logical_graph.hidden_nodes:
old_nodes = [m.root_graph.get_node_by_id(node.id) for m in models]
self.assertTrue(any([old_nodes[0].__repr__() == Node.__repr__(x) for x in old_nodes]))
def test_dedup_input(self):
os.environ['CGO'] = 'true'
lp, models = self._build_logical_with_mnist(3)
opt = DedupInputOptimizer()
opt.convert(lp)
with open('dedup_logical_graph.json', 'r') as fp:
correct_dump = fp.readlines()
lp_dump = lp.logical_graph._dump()
self.assertTrue(correct_dump[0] == json.dumps(lp_dump))
advisor = RetiariiAdvisor()
cgo = CGOExecutionEngine()
phy_models = cgo._assemble(lp)
self.assertTrue(len(phy_models) == 1)
# logging.info(phy_models[0][0]._dump())
# script=model_to_pytorch_script(phy_models[0][0], placement = phy_models[0][1])
# logging.info(script)
# with open('generated/debug_dedup_input.py', 'w') as fp:
# fp.write(script)
# sys.path.insert(0, 'generated')
# multi_model = import_('debug_dedup_input.logical_0')
# trainer = PyTorchMultiModelTrainer(
# multi_model(), phy_models[0][0].evaluator.kwargs
# )
# trainer.fit()
advisor.stopping = True
advisor.default_worker.join()
advisor.assessor_worker.join()
if __name__ == '__main__':
unittest.main()
......@@ -22,6 +22,8 @@ class EngineTest(unittest.TestCase):
self.assertEqual(script.strip(), reference_script.strip())
def test_base_execution_engine(self):
nni.retiarii.integration_api._advisor = None
nni.retiarii.execution.api._execution_engine = None
advisor = RetiariiAdvisor()
set_execution_engine(BaseExecutionEngine())
with open(self.enclosing_dir / 'mnist_pytorch.json') as f:
......@@ -33,7 +35,8 @@ class EngineTest(unittest.TestCase):
advisor.assessor_worker.join()
def test_py_execution_engine(self):
nni.retiarii.integration_api._advisor = None
nni.retiarii.execution.api._execution_engine = None
advisor = RetiariiAdvisor()
set_execution_engine(PurePythonExecutionEngine())
model = Model._load({
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment