Unverified Commit 4784cc6c authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

Merge pull request #3302 from microsoft/v2.0-merge

Merge branch v2.0 into master (no squash)
parents 25db55ca 349ead41
......@@ -7,9 +7,9 @@ import torch.nn as torch_nn
import ops
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import register_module
from nni.retiarii import blackbox_module
@blackbox_module
class AuxiliaryHead(nn.Module):
""" Auxiliary head in 2/3 place of network to let the gradient flow well """
......@@ -35,7 +35,6 @@ class AuxiliaryHead(nn.Module):
logits = self.linear(out)
return logits
@register_module()
class Node(nn.Module):
def __init__(self, node_id, num_prev_nodes, channels, num_downsample_connect):
super().__init__()
......@@ -55,7 +54,7 @@ class Node(nn.Module):
ops.DilConv(channels, channels, 5, stride, 4, 2, affine=False)
]))
self.drop_path = ops.DropPath()
self.input_switch = nn.InputChoice(n_chosen=2)
self.input_switch = nn.InputChoice(n_candidates=num_prev_nodes, n_chosen=2)
def forward(self, prev_nodes: List['Tensor']) -> 'Tensor':
#assert self.ops.__len__() == len(prev_nodes)
......@@ -66,7 +65,6 @@ class Node(nn.Module):
#out = [self.drop_path(o) if o is not None else None for o in out]
return self.input_switch(out)
@register_module()
class Cell(nn.Module):
def __init__(self, n_nodes, channels_pp, channels_p, channels, reduction_p, reduction):
......@@ -100,7 +98,6 @@ class Cell(nn.Module):
output = torch.cat(new_tensors, dim=1)
return output
@register_module()
class CNN(nn.Module):
def __init__(self, input_size, in_channels, channels, n_classes, n_layers, n_nodes=4,
......
import torch
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import register_module
from nni.retiarii import blackbox_module
@register_module()
@blackbox_module
class DropPath(nn.Module):
def __init__(self, p=0.):
"""
......@@ -12,7 +12,7 @@ class DropPath(nn.Module):
p : float
Probability of an path to be zeroed.
"""
super(DropPath, self).__init__()
super().__init__()
self.p = p
def forward(self, x):
......@@ -24,13 +24,13 @@ class DropPath(nn.Module):
return x
@register_module()
@blackbox_module
class PoolBN(nn.Module):
"""
AvgPool or MaxPool with BN. `pool_type` must be `max` or `avg`.
"""
def __init__(self, pool_type, C, kernel_size, stride, padding, affine=True):
super(PoolBN, self).__init__()
super().__init__()
if pool_type.lower() == 'max':
self.pool = nn.MaxPool2d(kernel_size, stride, padding)
elif pool_type.lower() == 'avg':
......@@ -45,13 +45,13 @@ class PoolBN(nn.Module):
out = self.bn(out)
return out
@register_module()
@blackbox_module
class StdConv(nn.Module):
"""
Standard conv: ReLU - Conv - BN
"""
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super(StdConv, self).__init__()
super().__init__()
self.net = nn.Sequential(
nn.ReLU(),
nn.Conv2d(C_in, C_out, kernel_size, stride, padding, bias=False),
......@@ -61,13 +61,13 @@ class StdConv(nn.Module):
def forward(self, x):
return self.net(x)
@register_module()
@blackbox_module
class FacConv(nn.Module):
"""
Factorized conv: ReLU - Conv(Kx1) - Conv(1xK) - BN
"""
def __init__(self, C_in, C_out, kernel_length, stride, padding, affine=True):
super(FacConv, self).__init__()
super().__init__()
self.net = nn.Sequential(
nn.ReLU(),
nn.Conv2d(C_in, C_in, (kernel_length, 1), stride, padding, bias=False),
......@@ -78,7 +78,7 @@ class FacConv(nn.Module):
def forward(self, x):
return self.net(x)
@register_module()
@blackbox_module
class DilConv(nn.Module):
"""
(Dilated) depthwise separable conv.
......@@ -86,7 +86,7 @@ class DilConv(nn.Module):
If dilation == 2, 3x3 conv => 5x5 receptive field, 5x5 conv => 9x9 receptive field.
"""
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
super(DilConv, self).__init__()
super().__init__()
self.net = nn.Sequential(
nn.ReLU(),
nn.Conv2d(C_in, C_in, kernel_size, stride, padding, dilation=dilation, groups=C_in,
......@@ -98,14 +98,14 @@ class DilConv(nn.Module):
def forward(self, x):
return self.net(x)
@register_module()
@blackbox_module
class SepConv(nn.Module):
"""
Depthwise separable conv.
DilConv(dilation=1) * 2.
"""
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super(SepConv, self).__init__()
super().__init__()
self.net = nn.Sequential(
DilConv(C_in, C_in, kernel_size, stride, padding, dilation=1, affine=affine),
DilConv(C_in, C_out, kernel_size, 1, padding, dilation=1, affine=affine)
......@@ -114,13 +114,13 @@ class SepConv(nn.Module):
def forward(self, x):
return self.net(x)
@register_module()
@blackbox_module
class FactorizedReduce(nn.Module):
"""
Reduce feature map size by factorized pointwise (stride=2).
"""
def __init__(self, C_in, C_out, affine=True):
super(FactorizedReduce, self).__init__()
super().__init__()
self.relu = nn.ReLU()
self.conv1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
self.conv2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
......
......@@ -5,7 +5,7 @@ import torch
from pathlib import Path
from nni.retiarii.experiment import RetiariiExperiment, RetiariiExeConfig
from nni.retiarii.strategies import TPEStrategy
from nni.retiarii.strategies import TPEStrategy, RandomStrategy
from nni.retiarii.trainer import PyTorchImageClassificationTrainer
from darts_model import CNN
......@@ -13,12 +13,13 @@ from darts_model import CNN
if __name__ == '__main__':
base_model = CNN(32, 3, 16, 10, 8)
trainer = PyTorchImageClassificationTrainer(base_model, dataset_cls="CIFAR10",
dataset_kwargs={"root": "data/cifar10", "download": True},
dataloader_kwargs={"batch_size": 32},
optimizer_kwargs={"lr": 1e-3},
trainer_kwargs={"max_epochs": 1})
dataset_kwargs={"root": "data/cifar10", "download": True},
dataloader_kwargs={"batch_size": 32},
optimizer_kwargs={"lr": 1e-3},
trainer_kwargs={"max_epochs": 1})
simple_startegy = TPEStrategy()
#simple_startegy = TPEStrategy()
simple_startegy = RandomStrategy()
exp = RetiariiExperiment(base_model, trainer, [], simple_startegy)
......@@ -30,4 +31,4 @@ if __name__ == '__main__':
exp_config.training_service.use_active_gpu = True
exp_config.training_service.gpu_indices = [1, 2]
exp.run(exp_config, 8081, debug=True)
exp.run(exp_config, 8081)
import json
import numpy as np
import os
import sys
import torch
import torch.nn as nn
from pathlib import Path
from torchvision import transforms
from torchvision.datasets import CIFAR10
from nni.retiarii.experiment import RetiariiExperiment, RetiariiExeConfig
from nni.retiarii.strategies import TPEStrategy
from nni.retiarii.trainer.pytorch import DartsTrainer
from darts_model import CNN
class Cutout(object):
def __init__(self, length):
self.length = length
def __call__(self, img):
h, w = img.size(1), img.size(2)
mask = np.ones((h, w), np.float32)
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - self.length // 2, 0, h)
y2 = np.clip(y + self.length // 2, 0, h)
x1 = np.clip(x - self.length // 2, 0, w)
x2 = np.clip(x + self.length // 2, 0, w)
mask[y1: y2, x1: x2] = 0.
mask = torch.from_numpy(mask)
mask = mask.expand_as(img)
img *= mask
return img
def get_dataset(cls, cutout_length=0):
MEAN = [0.49139968, 0.48215827, 0.44653124]
STD = [0.24703233, 0.24348505, 0.26158768]
transf = [
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip()
]
normalize = [
transforms.ToTensor(),
transforms.Normalize(MEAN, STD)
]
cutout = []
if cutout_length > 0:
cutout.append(Cutout(cutout_length))
train_transform = transforms.Compose(transf + normalize + cutout)
valid_transform = transforms.Compose(normalize)
if cls == "cifar10":
dataset_train = CIFAR10(root="./data/cifar10", train=True, download=True, transform=train_transform)
dataset_valid = CIFAR10(root="./data/cifar10", train=False, download=True, transform=valid_transform)
else:
raise NotImplementedError
return dataset_train, dataset_valid
def accuracy(output, target, topk=(1,)):
""" Computes the precision@k for the specified values of k """
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
# one-hot case
if target.ndimension() > 1:
target = target.max(1)[1]
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = dict()
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0)
res["acc{}".format(k)] = correct_k.mul_(1.0 / batch_size).item()
return res
if __name__ == '__main__':
base_model = CNN(32, 3, 16, 10, 8)
dataset_train, dataset_valid = get_dataset("cifar10")
criterion = nn.CrossEntropyLoss()
optim = torch.optim.SGD(base_model.parameters(), 0.025, momentum=0.9, weight_decay=3.0E-4)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, 50, eta_min=0.001)
trainer = DartsTrainer(
model=base_model,
loss=criterion,
metrics=lambda output, target: accuracy(output, target, topk=(1,)),
optimizer=optim,
num_epochs=50,
dataset=dataset_train,
batch_size=32,
log_frequency=10,
unrolled=False
)
exp = RetiariiExperiment(base_model, trainer)
exp.run()
from nni.retiarii import blackbox_module
import nni.retiarii.nn.pytorch as nn
import warnings
import torch
......@@ -8,8 +10,6 @@ import torch.nn.functional as F
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parents[2]))
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import register_module
# Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is
# 1.0 - tensorflow.
......@@ -27,6 +27,7 @@ class _ResidualBlock(nn.Module):
def forward(self, x):
return self.net(x) + x
class _InvertedResidual(nn.Module):
def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor, skip, bn_momentum=0.1):
......@@ -110,7 +111,7 @@ def _get_depths(depths, alpha):
rather than down. """
return [_round_to_multiple_of(depth * alpha, 8) for depth in depths]
@register_module()
class MNASNet(nn.Module):
""" MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
implements the B1 variant of the model.
......@@ -127,7 +128,7 @@ class MNASNet(nn.Module):
def __init__(self, alpha, depths, convops, kernel_sizes, num_layers,
skips, num_classes=1000, dropout=0.2):
super(MNASNet, self).__init__()
super().__init__()
assert alpha > 0.0
assert len(depths) == len(convops) == len(kernel_sizes) == len(num_layers) == len(skips) == 7
self.alpha = alpha
......@@ -143,22 +144,22 @@ class MNASNet(nn.Module):
nn.ReLU(inplace=True),
]
count = 0
#for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \
# for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \
# zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios):
for filter_size, exp_ratio, stride in zip(base_filter_sizes, exp_ratios, strides):
# TODO: restrict that "choose" can only be used within mutator
ph = nn.Placeholder(label=f'mutable_{count}', related_info={
'kernel_size_options': [1, 3, 5],
'n_layer_options': [1, 2, 3, 4],
'op_type_options': ['__mutated__.base_mnasnet.RegularConv',
'__mutated__.base_mnasnet.DepthwiseConv',
'__mutated__.base_mnasnet.MobileConv'],
#'se_ratio_options': [0, 0.25],
'skip_options': ['identity', 'no'],
'n_filter_options': [int(filter_size*x) for x in [0.75, 1.0, 1.25]],
'exp_ratio': exp_ratio,
'stride': stride,
'in_ch': depths[0] if count == 0 else None
'kernel_size_options': [1, 3, 5],
'n_layer_options': [1, 2, 3, 4],
'op_type_options': ['__mutated__.base_mnasnet.RegularConv',
'__mutated__.base_mnasnet.DepthwiseConv',
'__mutated__.base_mnasnet.MobileConv'],
# 'se_ratio_options': [0, 0.25],
'skip_options': ['identity', 'no'],
'n_filter_options': [int(filter_size*x) for x in [0.75, 1.0, 1.25]],
'exp_ratio': exp_ratio,
'stride': stride,
'in_ch': depths[0] if count == 0 else None
})
layers.append(ph)
'''if conv == "mconv":
......@@ -185,7 +186,7 @@ class MNASNet(nn.Module):
#self.for_test = 10
def forward(self, x):
#if self.for_test == 10:
# if self.for_test == 10:
x = self.layers(x)
# Equivalent to global avgpool and removing H and W dimensions.
x = x.mean([2, 3])
......@@ -196,7 +197,7 @@ class MNASNet(nn.Module):
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch_nn.init.kaiming_normal_(m.weight, mode="fan_out",
nonlinearity="relu")
nonlinearity="relu")
if m.bias is not None:
torch_nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
......@@ -204,16 +205,18 @@ class MNASNet(nn.Module):
torch_nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
torch_nn.init.kaiming_uniform_(m.weight, mode="fan_out",
nonlinearity="sigmoid")
nonlinearity="sigmoid")
torch_nn.init.zeros_(m.bias)
def test_model(model):
model(torch.randn(2, 3, 224, 224))
#====================definition of candidate op classes
# ====================definition of candidate op classes
BN_MOMENTUM = 1 - 0.9997
class RegularConv(nn.Module):
def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
super().__init__()
......@@ -234,6 +237,7 @@ class RegularConv(nn.Module):
out = out + x
return out
class DepthwiseConv(nn.Module):
def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
super().__init__()
......@@ -257,6 +261,7 @@ class DepthwiseConv(nn.Module):
out = out + x
return out
class MobileConv(nn.Module):
def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
super().__init__()
......@@ -274,7 +279,7 @@ class MobileConv(nn.Module):
nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True),
# Depthwise
nn.Conv2d(mid_ch, mid_ch, kernel_size, padding= (kernel_size - 1) // 2,
nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=(kernel_size - 1) // 2,
stride=stride, groups=mid_ch, bias=False),
nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True),
......@@ -288,5 +293,6 @@ class MobileConv(nn.Module):
out = out + x
return out
# mnasnet0_5
ir_module = _InvertedResidual(16, 16, 3, 1, 1, True)
\ No newline at end of file
ir_module = _InvertedResidual(16, 16, 3, 1, 1, True)
......@@ -19,12 +19,12 @@ if __name__ == '__main__':
_DEFAULT_NUM_LAYERS = [1, 3, 3, 3, 2, 4, 1]
base_model = MNASNet(0.5, _DEFAULT_DEPTHS, _DEFAULT_CONVOPS, _DEFAULT_KERNEL_SIZES,
_DEFAULT_NUM_LAYERS, _DEFAULT_SKIPS)
_DEFAULT_NUM_LAYERS, _DEFAULT_SKIPS)
trainer = PyTorchImageClassificationTrainer(base_model, dataset_cls="CIFAR10",
dataset_kwargs={"root": "data/cifar10", "download": True},
dataloader_kwargs={"batch_size": 32},
optimizer_kwargs={"lr": 1e-3},
trainer_kwargs={"max_epochs": 1})
dataset_kwargs={"root": "data/cifar10", "download": True},
dataloader_kwargs={"batch_size": 32},
optimizer_kwargs={"lr": 1e-3},
trainer_kwargs={"max_epochs": 1})
# new interface
applied_mutators = []
......@@ -41,4 +41,4 @@ if __name__ == '__main__':
exp_config.max_trial_number = 10
exp_config.training_service.use_active_gpu = False
exp.run(exp_config, 8081, debug=True)
exp.run(exp_config, 8081)
import random
import nni.retiarii.nn.pytorch as nn
import torch.nn.functional as F
from nni.retiarii.experiment import RetiariiExeConfig, RetiariiExperiment
from nni.retiarii.strategies import RandomStrategy
from nni.retiarii.trainer import PyTorchImageClassificationTrainer
class Net(nn.Module):
def __init__(self, hidden_size):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.LayerChoice([
nn.Linear(4*4*50, hidden_size),
nn.Linear(4*4*50, hidden_size, bias=False)
])
self.fc2 = nn.Linear(hidden_size, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 4*4*50)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
if __name__ == '__main__':
base_model = Net(128)
trainer = PyTorchImageClassificationTrainer(base_model, dataset_cls="MNIST",
dataset_kwargs={"root": "data/mnist", "download": True},
dataloader_kwargs={"batch_size": 32},
optimizer_kwargs={"lr": 1e-3},
trainer_kwargs={"max_epochs": 1})
simple_startegy = RandomStrategy()
exp = RetiariiExperiment(base_model, trainer, [], simple_startegy)
exp_config = RetiariiExeConfig('local')
exp_config.experiment_name = 'mnist_search'
exp_config.trial_concurrency = 2
exp_config.max_trial_number = 10
exp_config.training_service.use_active_gpu = False
exp.run(exp_config, 8081 + random.randint(0, 100))
"""
Reference: We use tested models from https://github.com/pytorch/pytorch/blob/master/test/jit/test_models.py.
"""
import os
import sys
import unittest
import numpy as np
import torch
import torch.nn.functional as F
import torchvision
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import blackbox_module
from nni.retiarii.converter import convert_to_graph
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii.utils import get_records
class MnistNet(nn.Module):
def __init__(self):
super(MnistNet, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
class TestConvert(unittest.TestCase):
@staticmethod
def _match_state_dict(current_values, expected_format):
result = {}
for k, v in expected_format.items():
for cv in current_values:
if cv.shape == v.shape:
result[k] = cv
current_values.remove(cv)
break
return result
def checkExportImport(self, model, input):
script_module = torch.jit.script(model)
model_ir = convert_to_graph(script_module, model)
model_code = model_to_pytorch_script(model_ir)
exec_vars = {}
exec(model_code + '\n\nconverted_model = _model()', exec_vars)
converted_model = exec_vars['converted_model']
converted_state_dict = self._match_state_dict(list(model.state_dict().values()),
dict(converted_model.state_dict()))
converted_model.load_state_dict(converted_state_dict)
with torch.no_grad():
expected_output = model.eval()(*input)
converted_output = converted_model.eval()(*input)
self.assertEqual(len(converted_output), len(expected_output))
for a, b in zip(converted_output, expected_output):
self.assertLess((a - b).abs().max().item(), 1E-4)
return converted_model
def setUp(self):
# FIXME
import nni.retiarii.debug_configs
nni.retiarii.debug_configs.framework = 'pytorch'
def test_dcgan_models(self):
class DCGANGenerator(nn.Module):
def __init__(self, nz, ngf, nc):
super(DCGANGenerator, self).__init__()
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, input):
return self.main(input)
class DCGANDiscriminator(nn.Module):
def __init__(self, nc, ndf):
super(DCGANDiscriminator, self).__init__()
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, input):
return self.main(input).view(-1, 1).squeeze(1)
bs, nz, ngf, nc, ndf = 5, 6, 9, 3, 10
input = (torch.rand(bs, nz, 1, 1),)
model = DCGANGenerator(nz, ngf, nc)
self.checkExportImport(model, input)
@unittest.skip('this test has a if condition that needs to be handle') # FIXME
def test_neural_style(self):
class TransformerNet(torch.nn.Module):
def __init__(self):
super(TransformerNet, self).__init__()
# Initial convolution layers
self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
# Residual layers
self.res1 = ResidualBlock(128)
self.res2 = ResidualBlock(128)
self.res3 = ResidualBlock(128)
self.res4 = ResidualBlock(128)
self.res5 = ResidualBlock(128)
# Upsampling Layers
self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
# Non-linearities
self.relu = torch.nn.ReLU()
def forward(self, X):
y = self.relu(self.in1(self.conv1(X)))
y = self.relu(self.in2(self.conv2(y)))
y = self.relu(self.in3(self.conv3(y)))
y = self.res1(y)
y = self.res2(y)
y = self.res3(y)
y = self.res4(y)
y = self.res5(y)
y = self.relu(self.in4(self.deconv1(y)))
y = self.relu(self.in5(self.deconv2(y)))
y = self.deconv3(y)
return y
class ConvLayer(torch.nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride):
super(ConvLayer, self).__init__()
reflection_padding = kernel_size // 2
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
def forward(self, x):
out = self.reflection_pad(x)
out = self.conv2d(out)
return out
class ResidualBlock(torch.nn.Module):
"""ResidualBlock
introduced in: https://arxiv.org/abs/1512.03385
recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
"""
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
self.relu = torch.nn.ReLU()
def forward(self, x):
residual = x
out = self.relu(self.in1(self.conv1(x)))
out = self.in2(self.conv2(out))
out = out + residual
return out
class UpsampleConvLayer(torch.nn.Module):
"""UpsampleConvLayer
Upsamples the input and then does a convolution. This method gives better results
compared to ConvTranspose2d.
ref: http://distill.pub/2016/deconv-checkerboard/
"""
def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
super(UpsampleConvLayer, self).__init__()
self.upsample = upsample
if upsample:
self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample)
reflection_padding = kernel_size // 2
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
def forward(self, x):
x_in = x
if self.upsample:
x_in = self.upsample_layer(x_in)
out = self.reflection_pad(x_in)
out = self.conv2d(out)
return out
model = TransformerNet()
input = (torch.rand(5, 3, 16, 16),)
self.checkExportImport(model, input)
def test_mnist(self):
# eval() is present because dropout makes this nondeterministic
self.checkExportImport(MnistNet().eval(), (torch.rand(5, 1, 28, 28),))
def test_reinforcement_learning(self):
class Policy(nn.Module):
def __init__(self):
super(Policy, self).__init__()
self.affine1 = nn.Linear(4, 128)
self.affine2 = nn.Linear(128, 2)
def forward(self, x):
x = F.relu(self.affine1(x))
action_scores = self.affine2(x)
return F.softmax(action_scores, dim=1)
self.checkExportImport(Policy(), (torch.rand(1, 4),))
@unittest.skip('Replaced init error.') # FIXME
def test_snli(self):
class Bottle(nn.Module):
def forward(self, input):
if len(input.size()) <= 2:
return super(Bottle, self).forward(input)
size = input.size()[:2]
out = super(Bottle, self).forward(input.view(size[0] * size[1], -1))
return out.view(size[0], size[1], -1)
class Linear(Bottle, nn.Linear):
pass
class Encoder(nn.Module):
def __init__(self, config):
super(Encoder, self).__init__()
self.config = config
input_size = config.d_proj if config.projection else config.d_embed
dropout = 0 if config.n_layers == 1 else config.dp_ratio
self.rnn = nn.LSTM(input_size=input_size, hidden_size=config.d_hidden,
num_layers=config.n_layers, dropout=dropout,
bidirectional=config.birnn)
def forward(self, inputs):
batch_size = inputs.size()[1]
state_shape = self.config.n_cells, batch_size, self.config.d_hidden
h0 = c0 = inputs.new_zeros(state_shape)
outputs, (ht, ct) = self.rnn(inputs, (h0, c0))
return ht[-1] if not self.config.birnn else ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1)
class SNLIClassifier(nn.Module):
def __init__(self, config):
super(SNLIClassifier, self).__init__()
self.config = config
self.embed = nn.Embedding(config.n_embed, config.d_embed)
self.projection = Linear(config.d_embed, config.d_proj)
self.encoder = Encoder(config)
self.dropout = nn.Dropout(p=config.dp_ratio)
self.relu = nn.ReLU()
seq_in_size = 2 * config.d_hidden
if self.config.birnn:
seq_in_size *= 2
lin_config = [seq_in_size] * 2
self.out = nn.Sequential(
Linear(*lin_config),
self.relu,
self.dropout,
Linear(*lin_config),
self.relu,
self.dropout,
Linear(*lin_config),
self.relu,
self.dropout,
Linear(seq_in_size, config.d_out))
def forward(self, premise, hypothesis):
prem_embed = self.embed(premise)
hypo_embed = self.embed(hypothesis)
if self.config.fix_emb:
prem_embed = prem_embed.detach()
hypo_embed = hypo_embed.detach()
if self.config.projection:
prem_embed = self.relu(self.projection(prem_embed))
hypo_embed = self.relu(self.projection(hypo_embed))
premise = self.encoder(prem_embed)
hypothesis = self.encoder(hypo_embed)
scores = self.out(torch.cat([premise, hypothesis], 1))
return scores
class Config:
n_embed = 100
d_embed = 100
d_proj = 300
dp_ratio = 0.0 # For deterministic testing TODO: change by fixing seed in checkTrace?
d_hidden = 30
birnn = True
d_out = 300
fix_emb = True
projection = True
n_layers = 2
n_cells = 4 # 2 * n_layers because birnn = True
premise = torch.LongTensor(48, 64).random_(0, 100)
hypothesis = torch.LongTensor(24, 64).random_(0, 100)
self.checkExportImport(SNLIClassifier(Config()), (premise, hypothesis))
def test_super_resolution(self):
class Net(nn.Module):
def __init__(self, upscale_factor):
super(Net, self).__init__()
self.relu = nn.ReLU()
self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1))
self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
def forward(self, x):
x = self.relu(self.conv1(x))
x = self.relu(self.conv2(x))
x = self.relu(self.conv3(x))
x = self.pixel_shuffle(self.conv4(x))
return x
net = Net(upscale_factor=4)
self.checkExportImport(net, (torch.rand(5, 1, 32, 32),))
@unittest.skip('Need to support operator prim::ListUnpack') # FIXME
def test_time_sequence_prediction(self):
class Sequence(torch.jit.ScriptModule):
def __init__(self):
super(Sequence, self).__init__()
self.lstm1 = nn.LSTMCell(1, 51)
self.lstm2 = nn.LSTMCell(51, 51)
self.linear = nn.Linear(51, 1)
@torch.jit.script_method
def forward(self, input):
# TODO: add future as input with default val
# see https://github.com/pytorch/pytorch/issues/8724
outputs = torch.empty((3, 0))
h_t = torch.zeros((3, 51))
c_t = torch.zeros((3, 51))
h_t2 = torch.zeros((3, 51))
c_t2 = torch.zeros((3, 51))
output = torch.zeros([3, 51])
future = 2
# TODO: chunk call should appear as the for loop iterable
# We hard-code it to 4 for now.
a, b, c, d = input.chunk(input.size(1), dim=1)
for input_t in (a, b, c, d):
h_t, c_t = self.lstm1(input_t, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs = torch.cat((outputs, output), 1)
for _ in range(future): # if we should predict the future
h_t, c_t = self.lstm1(output, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs = torch.cat((outputs, output), 1)
return outputs
class Traced(nn.Module):
def __init__(self):
super(Traced, self).__init__()
self.seq = Sequence()
def forward(self, input):
return self.seq.forward(input)
self.checkExportImport(Traced(), (torch.rand(3, 4),))
@unittest.skip('Unsupported callmethod encode') # FIXME
def test_vae(self):
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
self.fc1 = nn.Linear(784, 400)
self.fc21 = nn.Linear(400, 20)
self.fc22 = nn.Linear(400, 20)
self.fc3 = nn.Linear(20, 400)
self.fc4 = nn.Linear(400, 784)
def encode(self, x):
h1 = F.relu(self.fc1(x))
return self.fc21(h1), self.fc22(h1)
def reparameterize(self, mu, logvar):
if self.training:
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
else:
return mu
def decode(self, z):
h3 = F.relu(self.fc3(z))
return torch.sigmoid(self.fc4(h3))
def forward(self, x):
mu, logvar = self.encode(x.view(-1, 784))
z = self.reparameterize(mu, logvar)
return self.decode(z), mu, logvar
self.checkExportImport(VAE().eval(), (torch.rand(128, 1, 28, 28),))
@unittest.skip('torchvision models are not supported yet') # FIXME
def test_torchvision_resnet18(self):
self.checkExportImport(torchvision.models.resnet18().eval(), (torch.ones(1, 3, 224, 224),))
@unittest.skip('Unsupported CallMethod _forward_impl') # FIXME
def test_resnet(self):
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(torch.jit.ScriptModule):
expansion = 1
__constants__ = ['downsample']
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
@torch.jit.script_method
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(torch.jit.ScriptModule):
__constants__ = ['layer1', 'layer2', 'layer3', 'layer4']
def __init__(self, block, layers, num_classes=1000):
super(ResNet, self).__init__()
self.inplanes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
torch.nn.init.constant_(m.weight, 1)
torch.nn.init.constant_(m.bias, 0)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
@torch.jit.script_method
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
resnet18 = ResNet(BasicBlock, [2, 2, 2, 2])
self.checkExportImport(torchvision.models.resnet18().eval(), (torch.randn(1, 3, 224, 224),))
@unittest.skip('torchvision models are not supported yet') # FIXME
def test_alexnet(self):
x = torch.ones(1, 3, 224, 224)
model = torchvision.models.AlexNet()
self.checkExportImport(model, (x,))
......@@ -2,6 +2,7 @@
# Licensed under the MIT license.
import os
import psutil
import sys
import numpy as np
import torch
......@@ -128,6 +129,18 @@ def generate_random_sparsity(model):
'sparsity': sparsity})
return cfg_list
def generate_random_sparsity_v2(model):
"""
Only select 50% layers to prune.
"""
cfg_list = []
for name, module in model.named_modules():
if isinstance(module, nn.Conv2d):
if np.random.uniform(0, 1.0) > 0.5:
sparsity = np.random.uniform(0.5, 0.99)
cfg_list.append({'op_types': ['Conv2d'], 'op_names': [name],
'sparsity': sparsity})
return cfg_list
def zero_bn_bias(model):
with torch.no_grad():
......@@ -292,52 +305,62 @@ class SpeedupTestCase(TestCase):
# Example: https://msrasrg.visualstudio.com/NNIOpenSource/_build/results?buildId=16282
def test_speedup_integration(self):
for model_name in ['resnet18', 'squeezenet1_1',
'mobilenet_v2', 'densenet121',
# skip this test on windows(7GB mem available) due to memory limit
# Note: hack trick, may be updated in the future
if 'win' in sys.platform or 'Win'in sys.platform:
print('Skip test_speedup_integration on windows due to memory limit!')
return
Gen_cfg_funcs = [generate_random_sparsity, generate_random_sparsity_v2]
for model_name in ['resnet18', 'mobilenet_v2', 'squeezenet1_1', 'densenet121' , 'densenet169',
# 'inception_v3' inception is too large and may fail the pipeline
'densenet169', 'resnet50']:
kwargs = {
'pretrained': True
}
if model_name == 'resnet50':
# testing multiple groups
'resnet50']:
for gen_cfg_func in Gen_cfg_funcs:
kwargs = {
'pretrained': False,
'groups': 4
'pretrained': True
}
if model_name == 'resnet50':
# testing multiple groups
kwargs = {
'pretrained': False,
'groups': 4
}
Model = getattr(models, model_name)
net = Model(**kwargs).to(device)
speedup_model = Model(**kwargs).to(device)
net.eval() # this line is necessary
speedup_model.eval()
# random generate the prune config for the pruner
cfgs = gen_cfg_func(net)
print("Testing {} with compression config \n {}".format(model_name, cfgs))
pruner = L1FilterPruner(net, cfgs)
pruner.compress()
pruner.export_model(MODEL_FILE, MASK_FILE)
pruner._unwrap_model()
state_dict = torch.load(MODEL_FILE)
speedup_model.load_state_dict(state_dict)
zero_bn_bias(net)
zero_bn_bias(speedup_model)
data = torch.ones(BATCH_SIZE, 3, 128, 128).to(device)
ms = ModelSpeedup(speedup_model, data, MASK_FILE)
ms.speedup_model()
speedup_model.eval()
ori_out = net(data)
speeded_out = speedup_model(data)
ori_sum = torch.sum(ori_out).item()
speeded_sum = torch.sum(speeded_out).item()
print('Sum of the output of %s (before speedup):' %
model_name, ori_sum)
print('Sum of the output of %s (after speedup):' %
model_name, speeded_sum)
assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
(abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
Model = getattr(models, model_name)
net = Model(**kwargs).to(device)
speedup_model = Model(**kwargs).to(device)
net.eval() # this line is necessary
speedup_model.eval()
# random generate the prune config for the pruner
cfgs = generate_random_sparsity(net)
pruner = L1FilterPruner(net, cfgs)
pruner.compress()
pruner.export_model(MODEL_FILE, MASK_FILE)
pruner._unwrap_model()
state_dict = torch.load(MODEL_FILE)
speedup_model.load_state_dict(state_dict)
zero_bn_bias(net)
zero_bn_bias(speedup_model)
data = torch.ones(BATCH_SIZE, 3, 128, 128).to(device)
ms = ModelSpeedup(speedup_model, data, MASK_FILE)
ms.speedup_model()
speedup_model.eval()
ori_out = net(data)
speeded_out = speedup_model(data)
ori_sum = torch.sum(ori_out).item()
speeded_sum = torch.sum(speeded_out).item()
print('Sum of the output of %s (before speedup):' %
model_name, ori_sum)
print('Sum of the output of %s (after speedup):' %
model_name, speeded_sum)
assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
(abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
def test_channel_prune(self):
orig_net = resnet18(num_classes=10).to(device)
......@@ -369,8 +392,10 @@ class SpeedupTestCase(TestCase):
(abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
def tearDown(self):
os.remove(MODEL_FILE)
os.remove(MASK_FILE)
if os.path.exists(MODEL_FILE):
os.remove(MODEL_FILE)
if os.path.exists(MASK_FILE):
os.remove(MASK_FILE)
if __name__ == '__main__':
......
......@@ -344,16 +344,19 @@ function getTunerProc(command: string, stdio: StdioOptions, newCwd: string, newE
let cmd: string = command;
let arg: string[] = [];
let newShell: boolean = true;
let isDetached: boolean = false;
if (process.platform === "win32") {
cmd = command.split(" ", 1)[0];
arg = command.substr(cmd.length + 1).split(" ");
newShell = false;
isDetached = true;
}
const tunerProc: ChildProcess = spawn(cmd, arg, {
stdio,
cwd: newCwd,
env: newEnv,
shell: newShell
shell: newShell,
detached: isDetached
});
return tunerProc;
}
......@@ -434,8 +437,8 @@ function withLockSync(func: Function, filePath: string, lockOpts: {[key: string]
const lockPath = path.join(path.dirname(filePath), path.basename(filePath) + '.lock.*');
const lockFileNames: string[] = glob.sync(lockPath);
const canLock: boolean = lockFileNames.map((fileName) => {
return fs.existsSync(fileName) && Date.now() - fs.statSync(fileName).mtimeMs > lockOpts.stale;
}).filter(isExpired=>isExpired === false).length === 0;
return fs.existsSync(fileName) && Date.now() - fs.statSync(fileName).mtimeMs < lockOpts.stale;
}).filter(unexpired=>unexpired === true).length === 0;
if (!canLock) {
throw new Error('File has been locked.');
}
......
......@@ -450,15 +450,17 @@ class NNIManager implements Manager {
throw new Error('Error: tuner has not been setup');
}
this.trainingService.removeTrialJobMetricListener(this.trialJobMetricListener);
this.dispatcher.sendCommand(TERMINATE);
let tunerAlive: boolean = true;
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
for (let i: number = 0; i < 30; i++) {
if (!tunerAlive) { break; }
tunerAlive = await isAlive(this.dispatcherPid);
await delay(1000);
}
await killPid(this.dispatcherPid);
if (this.dispatcherPid > 0) {
this.dispatcher.sendCommand(TERMINATE);
let tunerAlive: boolean = true;
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
for (let i: number = 0; i < 30; i++) {
if (!tunerAlive) { break; }
tunerAlive = await isAlive(this.dispatcherPid);
await delay(1000);
}
await killPid(this.dispatcherPid);
}
const trialJobList: TrialJobDetail[] = await this.trainingService.listTrialJobs();
// DON'T try to make it in parallel, the training service may not handle it well.
......@@ -480,7 +482,6 @@ class NNIManager implements Manager {
}
await this.storeExperimentProfile();
this.setStatus('STOPPED');
this.experimentManager.setExperimentInfo(this.experimentProfile.id, 'port', undefined);
}
private async periodicallyUpdateExecDuration(): Promise<void> {
......
......@@ -37,7 +37,7 @@ function initStartupInfo(
}
async function initContainer(foreground: boolean, platformMode: string, logFileName?: string): Promise<void> {
const routerPlatformMode = ['remote', 'pai', 'aml', 'heterogeneous'];
const routerPlatformMode = ['remote', 'pai', 'aml', 'hybrid'];
if (routerPlatformMode.includes(platformMode)) {
Container.bind(TrainingService)
.to(RouterTrainingService)
......@@ -97,7 +97,7 @@ async function initContainer(foreground: boolean, platformMode: string, logFileN
function usage(): void {
console.info('usage: node main.js --port <port> --mode \
<local/remote/pai/kubeflow/frameworkcontroller/paiYarn/aml/adl/heterogeneous> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
<local/remote/pai/kubeflow/frameworkcontroller/paiYarn/aml/adl/hybrid> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
}
const strPort: string = parseArg(['--port', '-p']);
......@@ -117,7 +117,7 @@ const foreground: boolean = foregroundArg.toLowerCase() === 'true' ? true : fals
const port: number = parseInt(strPort, 10);
const mode: string = parseArg(['--mode', '-m']);
if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'heterogeneous'].includes(mode)) {
if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'hybrid'].includes(mode)) {
console.log(`FATAL: unknown mode: ${mode}`);
usage();
process.exit(1);
......
......@@ -21,7 +21,7 @@
"lockfile": "^1.0.4",
"python-shell": "^2.0.1",
"rx": "^4.1.0",
"sqlite3": "^5.0.0",
"sqlite3": "5.0.0",
"ssh2": "^0.6.1",
"stream-buffers": "^3.0.2",
"tail-stream": "^0.3.4",
......@@ -68,17 +68,19 @@
},
"resolutions": {
"mem": "^4.0.0",
"lodash": "^4.17.13",
"lodash.merge": "^4.6.2",
"lodash": ">=4.17.13",
"lodash.merge": ">=4.6.2",
"node.extend": "^1.1.7",
"hoek": "^4.2.1",
"js-yaml": "^3.13.1",
"acorn": ">=7.1.1",
"node-forge": "^0.10.0",
"node-forge": ">=0.10.0",
"dot-prop": "^4.2.1",
"npm": ">=6.14.8",
"yargs": ">=16.0.3",
"yargs-parser": ">=20.2.0"
"yargs-parser": ">=20.2.0",
"y18n": ">=5.0.5",
"acorn": ">=8.0.4",
"serialize-javascript": ">=5.0.1"
},
"engines": {
"node": ">=10.0.0"
......
......@@ -183,7 +183,7 @@ export namespace ValidationSchemas {
maxTrialNumPerGpu: joi.number(),
useActiveGpu: joi.boolean()
}),
heterogeneous_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
hybrid_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
trainingServicePlatforms: joi.array(),
}),
nni_manager_ip: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
......
......@@ -11,7 +11,7 @@ export enum TrialConfigMetadataKey {
LOCAL_CONFIG = 'local_config',
TRIAL_CONFIG = 'trial_config',
REMOTE_CONFIG = 'remote_config',
HETEROGENEOUS_CONFIG = 'heterogeneous_config',
HYBRID_CONFIG = 'hybrid_config',
EXPERIMENT_ID = 'experimentId',
MULTI_PHASE = 'multiPhase',
RANDOM_SCHEDULER = 'random_scheduler',
......@@ -24,7 +24,7 @@ export enum TrialConfigMetadataKey {
AML_CLUSTER_CONFIG = 'aml_config',
VERSION_CHECK = 'version_check',
LOG_COLLECTION = 'log_collection',
// Used to set platform for heterogeneous in reuse mode,
// Used to set platform for hybrid in reuse mode,
// temproarily change and will refactor config schema in the future
PLATFORM_LIST = 'platform_list'
}
......@@ -271,9 +271,9 @@ ${environment.command} --job_pid_file ${environment.runnerWorkingFolder}/pid \
environment.command, { encoding: 'utf8' });
// Copy files in codeDir to remote working directory
await executor.copyDirectoryToRemote(environmentLocalTempFolder, environment.runnerWorkingFolder);
// Execute command in remote machine
// Execute command in remote machine, set isInteractive=true to run script in conda environment
executor.executeScript(executor.joinPath(environment.runnerWorkingFolder,
executor.getScriptName("run")), true, false);
executor.getScriptName("run")), true, true);
if (environment.rmMachineMeta === undefined) {
throw new Error(`${environment.id} rmMachineMeta not initialized!`);
}
......
......@@ -95,8 +95,8 @@ class RouterTrainingService implements TrainingService {
public async setClusterMetadata(key: string, value: string): Promise<void> {
if (this.internalTrainingService === undefined) {
// Need to refactor configuration, remove heterogeneous_config field in the future
if (key === TrialConfigMetadataKey.HETEROGENEOUS_CONFIG){
// Need to refactor configuration, remove hybrid_config field in the future
if (key === TrialConfigMetadataKey.HYBRID_CONFIG){
this.internalTrainingService = component.get(TrialDispatcher);
const heterogenousConfig: HeterogenousConfig = <HeterogenousConfig>JSON.parse(value);
if (this.internalTrainingService === undefined) {
......
......@@ -529,9 +529,10 @@ acorn-jsx@^5.1.0:
version "5.1.0"
resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.1.0.tgz#294adb71b57398b0680015f0a38c563ee1db5384"
acorn@>=7.1.1, acorn@^7.1.0:
version "7.1.1"
resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.1.1.tgz#e35668de0b402f359de515c5482a1ab9f89a69bf"
acorn@>=8.0.4, acorn@^7.1.0:
version "8.0.4"
resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.0.4.tgz#7a3ae4191466a6984eee0fe3407a4f3aa9db8354"
integrity sha512-XNP0PqF1XD19ZlLKvB7cMmnZswW4C/03pRHgirB30uSJTaS3A3V1/P4sS3HPvFmjoriPCJQs+JDSbm4bL1TxGQ==
agent-base@4, agent-base@^4.3.0:
version "4.3.0"
......@@ -3122,9 +3123,10 @@ lodash.intersection@^4.4.0:
version "4.4.0"
resolved "https://registry.yarnpkg.com/lodash.intersection/-/lodash.intersection-4.4.0.tgz#0a11ba631d0e95c23c7f2f4cbb9a692ed178e705"
lodash.merge@^4.6.1, lodash.merge@^4.6.2:
lodash.merge@>=4.6.2, lodash.merge@^4.6.1:
version "4.6.2"
resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a"
integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==
lodash.omit@^4.5.0:
version "4.5.0"
......@@ -3154,10 +3156,10 @@ lodash.without@~4.4.0:
version "4.4.0"
resolved "https://registry.yarnpkg.com/lodash.without/-/lodash.without-4.4.0.tgz#3cd4574a00b67bae373a94b748772640507b7aac"
lodash@^4.17.11, lodash@^4.17.13, lodash@^4.17.14, lodash@^4.17.15:
version "4.17.19"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.19.tgz#e48ddedbe30b3321783c5b4301fbd353bc1e4a4b"
integrity sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ==
lodash@>=4.17.13, lodash@^4.17.11, lodash@^4.17.13, lodash@^4.17.14, lodash@^4.17.15:
version "4.17.20"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.20.tgz#b44a9b6297bcb698f1c51a3545a2b3b368d59c52"
integrity sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==
log-symbols@4.0.0:
version "4.0.0"
......@@ -3476,7 +3478,7 @@ node-fetch-npm@^2.0.2:
json-parse-better-errors "^1.0.0"
safe-buffer "^5.1.1"
node-forge@^0.10.0, node-forge@^0.7.6:
node-forge@>=0.10.0, node-forge@^0.7.6:
version "0.10.0"
resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3"
integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA==
......@@ -4836,10 +4838,10 @@ send@0.16.2:
range-parser "~1.2.0"
statuses "~1.4.0"
serialize-javascript@4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-4.0.0.tgz#b525e1238489a5ecfc42afacc3fe99e666f4b1aa"
integrity sha512-GaNA54380uFefWghODBWEGisLZFj00nS5ACs6yHa9nLqlLpVLO8ChDGeKRjZnV4Nh4n0Qi7nhYZD/9fCPzEqkw==
serialize-javascript@4.0.0, serialize-javascript@>=5.0.1:
version "5.0.1"
resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-5.0.1.tgz#7886ec848049a462467a97d3d918ebb2aaf934f4"
integrity sha512-SaaNal9imEO737H2c05Og0/8LUXG7EnsZyMa8MzkmuHoELfT6txuj0cMqRj6zfPKnmQ1yasR4PCJc8x+M4JSPA==
dependencies:
randombytes "^2.1.0"
......@@ -5006,7 +5008,7 @@ sprintf-js@~1.0.2:
version "1.0.3"
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
sqlite3@^5.0.0:
sqlite3@5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/sqlite3/-/sqlite3-5.0.0.tgz#1bfef2151c6bc48a3ab1a6c126088bb8dd233566"
integrity sha512-rjvqHFUaSGnzxDy2AHCwhHy6Zp6MNJzCPGYju4kD8yi6bze4d1/zMTg6C7JI49b7/EM7jKMTvyfN/4ylBKdwfw==
......@@ -5725,15 +5727,10 @@ xtend@~4.0.1:
resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"
integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==
y18n@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.0.tgz#95ef94f85ecc81d007c264e190a120f0a3c8566b"
integrity sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==
y18n@^5.0.2:
version "5.0.4"
resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.4.tgz#0ab2db89dd5873b5ec4682d8e703e833373ea897"
integrity sha512-deLOfD+RvFgrpAmSZgfGdWYE+OKyHcVHaRQ7NphG/63scpRvTHHeQMAxGGvaLVGJ+HYVcCXlzcTK0ZehFf+eHQ==
y18n@>=5.0.5, y18n@^4.0.0, y18n@^5.0.2:
version "5.0.5"
resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.5.tgz#8769ec08d03b1ea2df2500acef561743bbb9ab18"
integrity sha512-hsRUr4FFrvhhRH12wOdfs38Gy7k2FFzB9qgN9v3aLykRq0dRcdcpz5C9FxdS2NuhOrI/628b/KSTJ3rwHysYSg==
yallist@^2.1.2:
version "2.1.2"
......
......@@ -46,7 +46,6 @@
"react-json-tree": "^0.11.2",
"react-monaco-editor": "^0.32.1",
"react-paginate": "^6.3.2",
"react-pagination": "^1.0.0",
"react-responsive": "^8.1.1",
"react-router": "^5.2.0",
"react-router-dom": "^5.2.0",
......@@ -115,6 +114,10 @@
},
"resolutions": {
"npm": ">=6.14.4",
"yargs": ">=16.0.3"
"yargs": ">=16.0.3",
"acorn": ">=8.0.4",
"node-forge": ">=0.10.0",
"y18n": ">=5.0.5",
"serialize-javascript": ">=5.0.1"
}
}
......@@ -36,7 +36,6 @@
/* nav bar: 56 + marginTop: 24 */
margin-top: 80px;
margin-bottom: 30px;
}
.bottomDiv {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment