Merge pull request #3302 from microsoft/v2.0-merge

Merge branch v2.0 into master (no squash)

Merge pull request #3302 from microsoft/v2.0-merge
Merge branch v2.0 into master (no squash)
4784cc6c · liuzhe-lz · GitHub · 25db55ca · 349ead41 · 4784cc6c
Unverified Commit 4784cc6c authored Jan 14, 2021 by liuzhe-lz Committed by GitHub Jan 14, 2021
20 changed files
--- a/test/retiarii_test/darts/darts_model.py
+++ b/test/retiarii_test/darts/darts_model.py
@@ -7,9 +7,9 @@ import torch.nn as torch_nn

 import ops
 import nni.retiarii.nn.pytorch as nn
-from nni.retiarii import register_module
-
+from nni.retiarii import blackbox_module

+@blackbox_module
 class AuxiliaryHead(nn.Module):
    """ Auxiliary head in 2/3 place of network to let the gradient flow well """

@@ -35,7 +35,6 @@ class AuxiliaryHead(nn.Module):
        logits = self.linear(out)
        return logits

-@register_module()
 class Node(nn.Module):
    def __init__(self, node_id, num_prev_nodes, channels, num_downsample_connect):
        super().__init__()
@@ -55,7 +54,7 @@ class Node(nn.Module):
                    ops.DilConv(channels, channels, 5, stride, 4, 2, affine=False)
                ]))
        self.drop_path = ops.DropPath()
-        self.input_switch = nn.InputChoice(n_chosen=2)
+        self.input_switch = nn.InputChoice(n_candidates=num_prev_nodes, n_chosen=2)

    def forward(self, prev_nodes: List['Tensor']) -> 'Tensor':
        #assert self.ops.__len__() == len(prev_nodes)
@@ -66,7 +65,6 @@ class Node(nn.Module):
        #out = [self.drop_path(o) if o is not None else None for o in out]
        return self.input_switch(out)

-@register_module()
 class Cell(nn.Module):

    def __init__(self, n_nodes, channels_pp, channels_p, channels, reduction_p, reduction):
@@ -100,7 +98,6 @@ class Cell(nn.Module):
        output = torch.cat(new_tensors, dim=1)
        return output

-@register_module()
 class CNN(nn.Module):

    def __init__(self, input_size, in_channels, channels, n_classes, n_layers, n_nodes=4,

--- a/test/retiarii_test/darts/ops.py
+++ b/test/retiarii_test/darts/ops.py
 import torch
 import nni.retiarii.nn.pytorch as nn
-from nni.retiarii import register_module
+from nni.retiarii import blackbox_module

-@register_module()
+@blackbox_module
 class DropPath(nn.Module):
    def __init__(self, p=0.):
        """
@@ -12,7 +12,7 @@ class DropPath(nn.Module):
        p : float
            Probability of an path to be zeroed.
        """
-        super(DropPath, self).__init__()
+        super().__init__()
        self.p = p

    def forward(self, x):
@@ -24,13 +24,13 @@ class DropPath(nn.Module):

        return x

-@register_module()
+@blackbox_module
 class PoolBN(nn.Module):
    """
    AvgPool or MaxPool with BN. `pool_type` must be `max` or `avg`.
    """
    def __init__(self, pool_type, C, kernel_size, stride, padding, affine=True):
-        super(PoolBN, self).__init__()
+        super().__init__()
        if pool_type.lower() == 'max':
            self.pool = nn.MaxPool2d(kernel_size, stride, padding)
        elif pool_type.lower() == 'avg':
@@ -45,13 +45,13 @@ class PoolBN(nn.Module):
        out = self.bn(out)
        return out

-@register_module()
+@blackbox_module
 class StdConv(nn.Module):
    """
    Standard conv: ReLU - Conv - BN
    """
    def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
-        super(StdConv, self).__init__()
+        super().__init__()
        self.net = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(C_in, C_out, kernel_size, stride, padding, bias=False),
@@ -61,13 +61,13 @@ class StdConv(nn.Module):
    def forward(self, x):
        return self.net(x)

-@register_module()
+@blackbox_module
 class FacConv(nn.Module):
    """
    Factorized conv: ReLU - Conv(Kx1) - Conv(1xK) - BN
    """
    def __init__(self, C_in, C_out, kernel_length, stride, padding, affine=True):
-        super(FacConv, self).__init__()
+        super().__init__()
        self.net = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(C_in, C_in, (kernel_length, 1), stride, padding, bias=False),
@@ -78,7 +78,7 @@ class FacConv(nn.Module):
    def forward(self, x):
        return self.net(x)

-@register_module()
+@blackbox_module
 class DilConv(nn.Module):
    """
    (Dilated) depthwise separable conv.
@@ -86,7 +86,7 @@ class DilConv(nn.Module):
    If dilation == 2, 3x3 conv => 5x5 receptive field, 5x5 conv => 9x9 receptive field.
    """
    def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
-        super(DilConv, self).__init__()
+        super().__init__()
        self.net = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(C_in, C_in, kernel_size, stride, padding, dilation=dilation, groups=C_in,
@@ -98,14 +98,14 @@ class DilConv(nn.Module):
    def forward(self, x):
        return self.net(x)

-@register_module()
+@blackbox_module
 class SepConv(nn.Module):
    """
    Depthwise separable conv.
    DilConv(dilation=1) * 2.
    """
    def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
-        super(SepConv, self).__init__()
+        super().__init__()
        self.net = nn.Sequential(
            DilConv(C_in, C_in, kernel_size, stride, padding, dilation=1, affine=affine),
            DilConv(C_in, C_out, kernel_size, 1, padding, dilation=1, affine=affine)
@@ -114,13 +114,13 @@ class SepConv(nn.Module):
    def forward(self, x):
        return self.net(x)

-@register_module()
+@blackbox_module
 class FactorizedReduce(nn.Module):
    """
    Reduce feature map size by factorized pointwise (stride=2).
    """
    def __init__(self, C_in, C_out, affine=True):
-        super(FactorizedReduce, self).__init__()
+        super().__init__()
        self.relu = nn.ReLU()
        self.conv1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
        self.conv2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)

--- a/test/retiarii_test/darts/test.py
+++ b/test/retiarii_test/darts/test.py
@@ -5,7 +5,7 @@ import torch
 from pathlib import Path

 from nni.retiarii.experiment import RetiariiExperiment, RetiariiExeConfig
-from nni.retiarii.strategies import TPEStrategy
+from nni.retiarii.strategies import TPEStrategy, RandomStrategy
 from nni.retiarii.trainer import PyTorchImageClassificationTrainer

 from darts_model import CNN
@@ -13,12 +13,13 @@ from darts_model import CNN
 if __name__ == '__main__':
    base_model = CNN(32, 3, 16, 10, 8)
    trainer = PyTorchImageClassificationTrainer(base_model, dataset_cls="CIFAR10",
-            dataset_kwargs={"root": "data/cifar10", "download": True},
-            dataloader_kwargs={"batch_size": 32},
-            optimizer_kwargs={"lr": 1e-3},
-            trainer_kwargs={"max_epochs": 1})
+                                                dataset_kwargs={"root": "data/cifar10", "download": True},
+                                                dataloader_kwargs={"batch_size": 32},
+                                                optimizer_kwargs={"lr": 1e-3},
+                                                trainer_kwargs={"max_epochs": 1})

-    simple_startegy = TPEStrategy()
+    #simple_startegy = TPEStrategy()
+    simple_startegy = RandomStrategy()

    exp = RetiariiExperiment(base_model, trainer, [], simple_startegy)

@@ -30,4 +31,4 @@ if __name__ == '__main__':
    exp_config.training_service.use_active_gpu = True
    exp_config.training_service.gpu_indices = [1, 2]

-    exp.run(exp_config, 8081, debug=True)
+    exp.run(exp_config, 8081)
--- a/test/retiarii_test/darts/test_oneshot.py
+++ b/test/retiarii_test/darts/test_oneshot.py
+import json
+import numpy as np
+import os
+import sys
+import torch
+import torch.nn as nn
+from pathlib import Path
+from torchvision import transforms
+from torchvision.datasets import CIFAR10
+
+from nni.retiarii.experiment import RetiariiExperiment, RetiariiExeConfig
+from nni.retiarii.strategies import TPEStrategy
+from nni.retiarii.trainer.pytorch import DartsTrainer
+
+from darts_model import CNN
+
+class Cutout(object):
+    def __init__(self, length):
+        self.length = length
+
+    def __call__(self, img):
+        h, w = img.size(1), img.size(2)
+        mask = np.ones((h, w), np.float32)
+        y = np.random.randint(h)
+        x = np.random.randint(w)
+
+        y1 = np.clip(y - self.length // 2, 0, h)
+        y2 = np.clip(y + self.length // 2, 0, h)
+        x1 = np.clip(x - self.length // 2, 0, w)
+        x2 = np.clip(x + self.length // 2, 0, w)
+
+        mask[y1: y2, x1: x2] = 0.
+        mask = torch.from_numpy(mask)
+        mask = mask.expand_as(img)
+        img *= mask
+
+        return img
+
+
+def get_dataset(cls, cutout_length=0):
+    MEAN = [0.49139968, 0.48215827, 0.44653124]
+    STD = [0.24703233, 0.24348505, 0.26158768]
+    transf = [
+        transforms.RandomCrop(32, padding=4),
+        transforms.RandomHorizontalFlip()
+    ]
+    normalize = [
+        transforms.ToTensor(),
+        transforms.Normalize(MEAN, STD)
+    ]
+    cutout = []
+    if cutout_length > 0:
+        cutout.append(Cutout(cutout_length))
+
+    train_transform = transforms.Compose(transf + normalize + cutout)
+    valid_transform = transforms.Compose(normalize)
+
+    if cls == "cifar10":
+        dataset_train = CIFAR10(root="./data/cifar10", train=True, download=True, transform=train_transform)
+        dataset_valid = CIFAR10(root="./data/cifar10", train=False, download=True, transform=valid_transform)
+    else:
+        raise NotImplementedError
+    return dataset_train, dataset_valid
+
+def accuracy(output, target, topk=(1,)):
+    """ Computes the precision@k for the specified values of k """
+    maxk = max(topk)
+    batch_size = target.size(0)
+
+    _, pred = output.topk(maxk, 1, True, True)
+    pred = pred.t()
+    # one-hot case
+    if target.ndimension() > 1:
+        target = target.max(1)[1]
+
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+    res = dict()
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0)
+        res["acc{}".format(k)] = correct_k.mul_(1.0 / batch_size).item()
+    return res
+
+if __name__ == '__main__':
+    base_model = CNN(32, 3, 16, 10, 8)
+
+    dataset_train, dataset_valid = get_dataset("cifar10")
+    criterion = nn.CrossEntropyLoss()
+    optim = torch.optim.SGD(base_model.parameters(), 0.025, momentum=0.9, weight_decay=3.0E-4)
+    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, 50, eta_min=0.001)
+    trainer = DartsTrainer(
+        model=base_model,
+        loss=criterion,
+        metrics=lambda output, target: accuracy(output, target, topk=(1,)),
+        optimizer=optim,
+        num_epochs=50,
+        dataset=dataset_train,
+        batch_size=32,
+        log_frequency=10,
+        unrolled=False
+    )
+
+    exp = RetiariiExperiment(base_model, trainer)
+    exp.run()
--- a/test/retiarii_test/mnasnet/base_mnasnet.py
+++ b/test/retiarii_test/mnasnet/base_mnasnet.py
+from nni.retiarii import blackbox_module
+import nni.retiarii.nn.pytorch as nn
 import warnings

 import torch
@@ -8,8 +10,6 @@ import torch.nn.functional as F
 import sys
 from pathlib import Path
 sys.path.append(str(Path(__file__).resolve().parents[2]))
-import nni.retiarii.nn.pytorch as nn
-from nni.retiarii import register_module

 # Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is
 # 1.0 - tensorflow.
@@ -27,6 +27,7 @@ class _ResidualBlock(nn.Module):
    def forward(self, x):
        return self.net(x) + x

+
 class _InvertedResidual(nn.Module):

    def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor, skip, bn_momentum=0.1):
@@ -110,7 +111,7 @@ def _get_depths(depths, alpha):
    rather than down. """
    return [_round_to_multiple_of(depth * alpha, 8) for depth in depths]

-@register_module()
+
 class MNASNet(nn.Module):
    """ MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
    implements the B1 variant of the model.
@@ -127,7 +128,7 @@ class MNASNet(nn.Module):

    def __init__(self, alpha, depths, convops, kernel_sizes, num_layers,
                 skips, num_classes=1000, dropout=0.2):
-        super(MNASNet, self).__init__()
+        super().__init__()
        assert alpha > 0.0
        assert len(depths) == len(convops) == len(kernel_sizes) == len(num_layers) == len(skips) == 7
        self.alpha = alpha
@@ -143,22 +144,22 @@ class MNASNet(nn.Module):
            nn.ReLU(inplace=True),
        ]
        count = 0
-        #for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \
+        # for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \
        #        zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios):
        for filter_size, exp_ratio, stride in zip(base_filter_sizes, exp_ratios, strides):
            # TODO: restrict that "choose" can only be used within mutator
            ph = nn.Placeholder(label=f'mutable_{count}', related_info={
-                           'kernel_size_options': [1, 3, 5],
-                           'n_layer_options': [1, 2, 3, 4],
-                           'op_type_options': ['__mutated__.base_mnasnet.RegularConv',
-                                               '__mutated__.base_mnasnet.DepthwiseConv',
-                                               '__mutated__.base_mnasnet.MobileConv'],
-                           #'se_ratio_options': [0, 0.25],
-                           'skip_options': ['identity', 'no'],
-                           'n_filter_options': [int(filter_size*x) for x in [0.75, 1.0, 1.25]],
-                           'exp_ratio': exp_ratio,
-                           'stride': stride,
-                           'in_ch': depths[0] if count == 0 else None
+                'kernel_size_options': [1, 3, 5],
+                'n_layer_options': [1, 2, 3, 4],
+                'op_type_options': ['__mutated__.base_mnasnet.RegularConv',
+                                    '__mutated__.base_mnasnet.DepthwiseConv',
+                                    '__mutated__.base_mnasnet.MobileConv'],
+                # 'se_ratio_options': [0, 0.25],
+                'skip_options': ['identity', 'no'],
+                'n_filter_options': [int(filter_size*x) for x in [0.75, 1.0, 1.25]],
+                'exp_ratio': exp_ratio,
+                'stride': stride,
+                'in_ch': depths[0] if count == 0 else None
            })
            layers.append(ph)
            '''if conv == "mconv":
@@ -185,7 +186,7 @@ class MNASNet(nn.Module):
        #self.for_test = 10

    def forward(self, x):
-        #if self.for_test == 10:
+        # if self.for_test == 10:
        x = self.layers(x)
        # Equivalent to global avgpool and removing H and W dimensions.
        x = x.mean([2, 3])
@@ -196,7 +197,7 @@ class MNASNet(nn.Module):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch_nn.init.kaiming_normal_(m.weight, mode="fan_out",
-                                        nonlinearity="relu")
+                                              nonlinearity="relu")
                if m.bias is not None:
                    torch_nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
@@ -204,16 +205,18 @@ class MNASNet(nn.Module):
                torch_nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                torch_nn.init.kaiming_uniform_(m.weight, mode="fan_out",
-                                         nonlinearity="sigmoid")
+                                               nonlinearity="sigmoid")
                torch_nn.init.zeros_(m.bias)


 def test_model(model):
    model(torch.randn(2, 3, 224, 224))

-#====================definition of candidate op classes
+
+# ====================definition of candidate op classes
 BN_MOMENTUM = 1 - 0.9997

+
 class RegularConv(nn.Module):
    def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
        super().__init__()
@@ -234,6 +237,7 @@ class RegularConv(nn.Module):
            out = out + x
        return out

+
 class DepthwiseConv(nn.Module):
    def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
        super().__init__()
@@ -257,6 +261,7 @@ class DepthwiseConv(nn.Module):
            out = out + x
        return out

+
 class MobileConv(nn.Module):
    def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
        super().__init__()
@@ -274,7 +279,7 @@ class MobileConv(nn.Module):
            nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM),
            nn.ReLU(inplace=True),
            # Depthwise
-            nn.Conv2d(mid_ch, mid_ch, kernel_size, padding= (kernel_size - 1) // 2,
+            nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=(kernel_size - 1) // 2,
                      stride=stride, groups=mid_ch, bias=False),
            nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM),
            nn.ReLU(inplace=True),
@@ -288,5 +293,6 @@ class MobileConv(nn.Module):
            out = out + x
        return out

+
 # mnasnet0_5
-ir_module = _InvertedResidual(16, 16, 3, 1, 1, True)
\ No newline at end of file
+ir_module = _InvertedResidual(16, 16, 3, 1, 1, True)
--- a/test/retiarii_test/mnasnet/test.py
+++ b/test/retiarii_test/mnasnet/test.py
@@ -19,12 +19,12 @@ if __name__ == '__main__':
    _DEFAULT_NUM_LAYERS = [1, 3, 3, 3, 2, 4, 1]

    base_model = MNASNet(0.5, _DEFAULT_DEPTHS, _DEFAULT_CONVOPS, _DEFAULT_KERNEL_SIZES,
-                    _DEFAULT_NUM_LAYERS, _DEFAULT_SKIPS)
+                         _DEFAULT_NUM_LAYERS, _DEFAULT_SKIPS)
    trainer = PyTorchImageClassificationTrainer(base_model, dataset_cls="CIFAR10",
-            dataset_kwargs={"root": "data/cifar10", "download": True},
-            dataloader_kwargs={"batch_size": 32},
-            optimizer_kwargs={"lr": 1e-3},
-            trainer_kwargs={"max_epochs": 1})
+                                                dataset_kwargs={"root": "data/cifar10", "download": True},
+                                                dataloader_kwargs={"batch_size": 32},
+                                                optimizer_kwargs={"lr": 1e-3},
+                                                trainer_kwargs={"max_epochs": 1})

    # new interface
    applied_mutators = []
@@ -41,4 +41,4 @@ if __name__ == '__main__':
    exp_config.max_trial_number = 10
    exp_config.training_service.use_active_gpu = False

-    exp.run(exp_config, 8081, debug=True)
+    exp.run(exp_config, 8081)
--- a/test/retiarii_test/mnist/test.py
+++ b/test/retiarii_test/mnist/test.py
+import random
+
+import nni.retiarii.nn.pytorch as nn
+import torch.nn.functional as F
+from nni.retiarii.experiment import RetiariiExeConfig, RetiariiExperiment
+from nni.retiarii.strategies import RandomStrategy
+from nni.retiarii.trainer import PyTorchImageClassificationTrainer
+
+
+class Net(nn.Module):
+    def __init__(self, hidden_size):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5, 1)
+        self.conv2 = nn.Conv2d(20, 50, 5, 1)
+        self.fc1 = nn.LayerChoice([
+            nn.Linear(4*4*50, hidden_size),
+            nn.Linear(4*4*50, hidden_size, bias=False)
+        ])
+        self.fc2 = nn.Linear(hidden_size, 10)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = F.max_pool2d(x, 2, 2)
+        x = F.relu(self.conv2(x))
+        x = F.max_pool2d(x, 2, 2)
+        x = x.view(-1, 4*4*50)
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return F.log_softmax(x, dim=1)
+
+
+if __name__ == '__main__':
+    base_model = Net(128)
+    trainer = PyTorchImageClassificationTrainer(base_model, dataset_cls="MNIST",
+                                                dataset_kwargs={"root": "data/mnist", "download": True},
+                                                dataloader_kwargs={"batch_size": 32},
+                                                optimizer_kwargs={"lr": 1e-3},
+                                                trainer_kwargs={"max_epochs": 1})
+
+    simple_startegy = RandomStrategy()
+
+    exp = RetiariiExperiment(base_model, trainer, [], simple_startegy)
+
+    exp_config = RetiariiExeConfig('local')
+    exp_config.experiment_name = 'mnist_search'
+    exp_config.trial_concurrency = 2
+    exp_config.max_trial_number = 10
+    exp_config.training_service.use_active_gpu = False
+
+    exp.run(exp_config, 8081 + random.randint(0, 100))
--- a/test/ut/retiarii/test_convert.py
+++ b/test/ut/retiarii/test_convert.py
+"""
+Reference: We use tested models from https://github.com/pytorch/pytorch/blob/master/test/jit/test_models.py.
+"""
+
+import os
+import sys
+import unittest
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torchvision
+
+import nni.retiarii.nn.pytorch as nn
+from nni.retiarii import blackbox_module
+from nni.retiarii.converter import convert_to_graph
+from nni.retiarii.codegen import model_to_pytorch_script
+from nni.retiarii.utils import get_records
+
+class MnistNet(nn.Module):
+    def __init__(self):
+        super(MnistNet, self).__init__()
+        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
+        self.conv2_drop = nn.Dropout2d()
+        self.fc1 = nn.Linear(320, 50)
+        self.fc2 = nn.Linear(50, 10)
+
+    def forward(self, x):
+        x = F.relu(F.max_pool2d(self.conv1(x), 2))
+        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+        x = x.view(-1, 320)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        return F.log_softmax(x, dim=1)
+
+
+class TestConvert(unittest.TestCase):
+    @staticmethod
+    def _match_state_dict(current_values, expected_format):
+        result = {}
+        for k, v in expected_format.items():
+            for cv in current_values:
+                if cv.shape == v.shape:
+                    result[k] = cv
+                    current_values.remove(cv)
+                    break
+        return result
+
+    def checkExportImport(self, model, input):
+        script_module = torch.jit.script(model)
+        model_ir = convert_to_graph(script_module, model)
+        model_code = model_to_pytorch_script(model_ir)
+
+        exec_vars = {}
+        exec(model_code + '\n\nconverted_model = _model()', exec_vars)
+        converted_model = exec_vars['converted_model']
+        converted_state_dict = self._match_state_dict(list(model.state_dict().values()),
+                                                      dict(converted_model.state_dict()))
+        converted_model.load_state_dict(converted_state_dict)
+        with torch.no_grad():
+            expected_output = model.eval()(*input)
+            converted_output = converted_model.eval()(*input)
+        self.assertEqual(len(converted_output), len(expected_output))
+        for a, b in zip(converted_output, expected_output):
+            self.assertLess((a - b).abs().max().item(), 1E-4)
+        return converted_model
+
+    def setUp(self):
+        # FIXME
+        import nni.retiarii.debug_configs
+        nni.retiarii.debug_configs.framework = 'pytorch'
+
+    def test_dcgan_models(self):
+        class DCGANGenerator(nn.Module):
+            def __init__(self, nz, ngf, nc):
+                super(DCGANGenerator, self).__init__()
+                self.main = nn.Sequential(
+                    # input is Z, going into a convolution
+                    nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
+                    nn.BatchNorm2d(ngf * 8),
+                    nn.ReLU(True),
+                    # state size. (ngf*8) x 4 x 4
+                    nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
+                    nn.BatchNorm2d(ngf * 4),
+                    nn.ReLU(True),
+                    # state size. (ngf*4) x 8 x 8
+                    nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
+                    nn.BatchNorm2d(ngf * 2),
+                    nn.ReLU(True),
+                    # state size. (ngf*2) x 16 x 16
+                    nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
+                    nn.BatchNorm2d(ngf),
+                    nn.ReLU(True),
+                    # state size. (ngf) x 32 x 32
+                    nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
+                    nn.Tanh()
+                    # state size. (nc) x 64 x 64
+                )
+
+            def forward(self, input):
+                return self.main(input)
+
+        class DCGANDiscriminator(nn.Module):
+            def __init__(self, nc, ndf):
+                super(DCGANDiscriminator, self).__init__()
+                self.main = nn.Sequential(
+                    # input is (nc) x 64 x 64
+                    nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
+                    nn.LeakyReLU(0.2, inplace=True),
+                    # state size. (ndf) x 32 x 32
+                    nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
+                    nn.BatchNorm2d(ndf * 2),
+                    nn.LeakyReLU(0.2, inplace=True),
+                    # state size. (ndf*2) x 16 x 16
+                    nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
+                    nn.BatchNorm2d(ndf * 4),
+                    nn.LeakyReLU(0.2, inplace=True),
+                    # state size. (ndf*4) x 8 x 8
+                    nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
+                    nn.BatchNorm2d(ndf * 8),
+                    nn.LeakyReLU(0.2, inplace=True),
+                    # state size. (ndf*8) x 4 x 4
+                    nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
+                    nn.Sigmoid()
+                )
+
+            def forward(self, input):
+                return self.main(input).view(-1, 1).squeeze(1)
+
+        bs, nz, ngf, nc, ndf = 5, 6, 9, 3, 10
+        input = (torch.rand(bs, nz, 1, 1),)
+        model = DCGANGenerator(nz, ngf, nc)
+        self.checkExportImport(model, input)
+
+    @unittest.skip('this test has a if condition that needs to be handle')  # FIXME
+    def test_neural_style(self):
+        class TransformerNet(torch.nn.Module):
+            def __init__(self):
+                super(TransformerNet, self).__init__()
+                # Initial convolution layers
+                self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
+                self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
+                self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
+                self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
+                self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
+                self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
+                # Residual layers
+                self.res1 = ResidualBlock(128)
+                self.res2 = ResidualBlock(128)
+                self.res3 = ResidualBlock(128)
+                self.res4 = ResidualBlock(128)
+                self.res5 = ResidualBlock(128)
+                # Upsampling Layers
+                self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
+                self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
+                self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
+                self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
+                self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
+                # Non-linearities
+                self.relu = torch.nn.ReLU()
+
+            def forward(self, X):
+                y = self.relu(self.in1(self.conv1(X)))
+                y = self.relu(self.in2(self.conv2(y)))
+                y = self.relu(self.in3(self.conv3(y)))
+                y = self.res1(y)
+                y = self.res2(y)
+                y = self.res3(y)
+                y = self.res4(y)
+                y = self.res5(y)
+                y = self.relu(self.in4(self.deconv1(y)))
+                y = self.relu(self.in5(self.deconv2(y)))
+                y = self.deconv3(y)
+                return y
+
+        class ConvLayer(torch.nn.Module):
+            def __init__(self, in_channels, out_channels, kernel_size, stride):
+                super(ConvLayer, self).__init__()
+                reflection_padding = kernel_size // 2
+                self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
+                self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
+
+            def forward(self, x):
+                out = self.reflection_pad(x)
+                out = self.conv2d(out)
+                return out
+
+        class ResidualBlock(torch.nn.Module):
+            """ResidualBlock
+            introduced in: https://arxiv.org/abs/1512.03385
+            recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
+            """
+
+            def __init__(self, channels):
+                super(ResidualBlock, self).__init__()
+                self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
+                self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
+                self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
+                self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
+                self.relu = torch.nn.ReLU()
+
+            def forward(self, x):
+                residual = x
+                out = self.relu(self.in1(self.conv1(x)))
+                out = self.in2(self.conv2(out))
+                out = out + residual
+                return out
+
+        class UpsampleConvLayer(torch.nn.Module):
+            """UpsampleConvLayer
+            Upsamples the input and then does a convolution. This method gives better results
+            compared to ConvTranspose2d.
+            ref: http://distill.pub/2016/deconv-checkerboard/
+            """
+
+            def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
+                super(UpsampleConvLayer, self).__init__()
+                self.upsample = upsample
+                if upsample:
+                    self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample)
+                reflection_padding = kernel_size // 2
+                self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
+                self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
+
+            def forward(self, x):
+                x_in = x
+                if self.upsample:
+                    x_in = self.upsample_layer(x_in)
+                out = self.reflection_pad(x_in)
+                out = self.conv2d(out)
+                return out
+
+        model = TransformerNet()
+        input = (torch.rand(5, 3, 16, 16),)
+        self.checkExportImport(model, input)
+
+    def test_mnist(self):
+        # eval() is present because dropout makes this nondeterministic
+        self.checkExportImport(MnistNet().eval(), (torch.rand(5, 1, 28, 28),))
+
+    def test_reinforcement_learning(self):
+        class Policy(nn.Module):
+            def __init__(self):
+                super(Policy, self).__init__()
+                self.affine1 = nn.Linear(4, 128)
+                self.affine2 = nn.Linear(128, 2)
+
+            def forward(self, x):
+                x = F.relu(self.affine1(x))
+                action_scores = self.affine2(x)
+                return F.softmax(action_scores, dim=1)
+
+        self.checkExportImport(Policy(), (torch.rand(1, 4),))
+
+    @unittest.skip('Replaced init error.')  # FIXME
+    def test_snli(self):
+        class Bottle(nn.Module):
+
+            def forward(self, input):
+                if len(input.size()) <= 2:
+                    return super(Bottle, self).forward(input)
+                size = input.size()[:2]
+                out = super(Bottle, self).forward(input.view(size[0] * size[1], -1))
+                return out.view(size[0], size[1], -1)
+
+        class Linear(Bottle, nn.Linear):
+            pass
+
+        class Encoder(nn.Module):
+
+            def __init__(self, config):
+                super(Encoder, self).__init__()
+                self.config = config
+                input_size = config.d_proj if config.projection else config.d_embed
+                dropout = 0 if config.n_layers == 1 else config.dp_ratio
+                self.rnn = nn.LSTM(input_size=input_size, hidden_size=config.d_hidden,
+                                   num_layers=config.n_layers, dropout=dropout,
+                                   bidirectional=config.birnn)
+
+            def forward(self, inputs):
+                batch_size = inputs.size()[1]
+                state_shape = self.config.n_cells, batch_size, self.config.d_hidden
+                h0 = c0 = inputs.new_zeros(state_shape)
+                outputs, (ht, ct) = self.rnn(inputs, (h0, c0))
+                return ht[-1] if not self.config.birnn else ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1)
+
+        class SNLIClassifier(nn.Module):
+
+            def __init__(self, config):
+                super(SNLIClassifier, self).__init__()
+                self.config = config
+                self.embed = nn.Embedding(config.n_embed, config.d_embed)
+                self.projection = Linear(config.d_embed, config.d_proj)
+                self.encoder = Encoder(config)
+                self.dropout = nn.Dropout(p=config.dp_ratio)
+                self.relu = nn.ReLU()
+                seq_in_size = 2 * config.d_hidden
+                if self.config.birnn:
+                    seq_in_size *= 2
+                lin_config = [seq_in_size] * 2
+                self.out = nn.Sequential(
+                    Linear(*lin_config),
+                    self.relu,
+                    self.dropout,
+                    Linear(*lin_config),
+                    self.relu,
+                    self.dropout,
+                    Linear(*lin_config),
+                    self.relu,
+                    self.dropout,
+                    Linear(seq_in_size, config.d_out))
+
+            def forward(self, premise, hypothesis):
+                prem_embed = self.embed(premise)
+                hypo_embed = self.embed(hypothesis)
+                if self.config.fix_emb:
+                    prem_embed = prem_embed.detach()
+                    hypo_embed = hypo_embed.detach()
+                if self.config.projection:
+                    prem_embed = self.relu(self.projection(prem_embed))
+                    hypo_embed = self.relu(self.projection(hypo_embed))
+                premise = self.encoder(prem_embed)
+                hypothesis = self.encoder(hypo_embed)
+                scores = self.out(torch.cat([premise, hypothesis], 1))
+                return scores
+
+        class Config:
+            n_embed = 100
+            d_embed = 100
+            d_proj = 300
+            dp_ratio = 0.0  # For deterministic testing TODO: change by fixing seed in checkTrace?
+            d_hidden = 30
+            birnn = True
+            d_out = 300
+            fix_emb = True
+            projection = True
+            n_layers = 2
+            n_cells = 4  # 2 * n_layers because birnn = True
+
+        premise = torch.LongTensor(48, 64).random_(0, 100)
+        hypothesis = torch.LongTensor(24, 64).random_(0, 100)
+
+        self.checkExportImport(SNLIClassifier(Config()), (premise, hypothesis))
+
+    def test_super_resolution(self):
+        class Net(nn.Module):
+
+            def __init__(self, upscale_factor):
+                super(Net, self).__init__()
+
+                self.relu = nn.ReLU()
+                self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
+                self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
+                self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
+                self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1))
+                self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
+
+            def forward(self, x):
+                x = self.relu(self.conv1(x))
+                x = self.relu(self.conv2(x))
+                x = self.relu(self.conv3(x))
+                x = self.pixel_shuffle(self.conv4(x))
+                return x
+
+        net = Net(upscale_factor=4)
+        self.checkExportImport(net, (torch.rand(5, 1, 32, 32),))
+
+    @unittest.skip('Need to support operator prim::ListUnpack')  # FIXME
+    def test_time_sequence_prediction(self):
+        class Sequence(torch.jit.ScriptModule):
+            def __init__(self):
+                super(Sequence, self).__init__()
+                self.lstm1 = nn.LSTMCell(1, 51)
+                self.lstm2 = nn.LSTMCell(51, 51)
+                self.linear = nn.Linear(51, 1)
+
+            @torch.jit.script_method
+            def forward(self, input):
+                # TODO: add future as input with default val
+                # see https://github.com/pytorch/pytorch/issues/8724
+                outputs = torch.empty((3, 0))
+                h_t = torch.zeros((3, 51))
+                c_t = torch.zeros((3, 51))
+                h_t2 = torch.zeros((3, 51))
+                c_t2 = torch.zeros((3, 51))
+
+                output = torch.zeros([3, 51])
+                future = 2
+
+                # TODO: chunk call should appear as the for loop iterable
+                # We hard-code it to 4 for now.
+                a, b, c, d = input.chunk(input.size(1), dim=1)
+                for input_t in (a, b, c, d):
+                    h_t, c_t = self.lstm1(input_t, (h_t, c_t))
+                    h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
+                    output = self.linear(h_t2)
+                    outputs = torch.cat((outputs, output), 1)
+                for _ in range(future):  # if we should predict the future
+                    h_t, c_t = self.lstm1(output, (h_t, c_t))
+                    h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
+                    output = self.linear(h_t2)
+                    outputs = torch.cat((outputs, output), 1)
+                return outputs
+
+        class Traced(nn.Module):
+            def __init__(self):
+                super(Traced, self).__init__()
+                self.seq = Sequence()
+
+            def forward(self, input):
+                return self.seq.forward(input)
+
+        self.checkExportImport(Traced(), (torch.rand(3, 4),))
+
+    @unittest.skip('Unsupported callmethod encode')  # FIXME
+    def test_vae(self):
+        class VAE(nn.Module):
+            def __init__(self):
+                super(VAE, self).__init__()
+
+                self.fc1 = nn.Linear(784, 400)
+                self.fc21 = nn.Linear(400, 20)
+                self.fc22 = nn.Linear(400, 20)
+                self.fc3 = nn.Linear(20, 400)
+                self.fc4 = nn.Linear(400, 784)
+
+            def encode(self, x):
+                h1 = F.relu(self.fc1(x))
+                return self.fc21(h1), self.fc22(h1)
+
+            def reparameterize(self, mu, logvar):
+                if self.training:
+                    std = torch.exp(0.5 * logvar)
+                    eps = torch.randn_like(std)
+                    return eps.mul(std).add_(mu)
+                else:
+                    return mu
+
+            def decode(self, z):
+                h3 = F.relu(self.fc3(z))
+                return torch.sigmoid(self.fc4(h3))
+
+            def forward(self, x):
+                mu, logvar = self.encode(x.view(-1, 784))
+                z = self.reparameterize(mu, logvar)
+                return self.decode(z), mu, logvar
+
+        self.checkExportImport(VAE().eval(), (torch.rand(128, 1, 28, 28),))
+
+    @unittest.skip('torchvision models are not supported yet')  # FIXME
+    def test_torchvision_resnet18(self):
+        self.checkExportImport(torchvision.models.resnet18().eval(), (torch.ones(1, 3, 224, 224),))
+
+    @unittest.skip('Unsupported CallMethod _forward_impl')  # FIXME
+    def test_resnet(self):
+        def conv1x1(in_planes, out_planes, stride=1):
+            """1x1 convolution"""
+            return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+        def conv3x3(in_planes, out_planes, stride=1):
+            """3x3 convolution with padding"""
+            return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                             padding=1, bias=False)
+
+        class BasicBlock(torch.jit.ScriptModule):
+            expansion = 1
+            __constants__ = ['downsample']
+
+            def __init__(self, inplanes, planes, stride=1, downsample=None):
+                super(BasicBlock, self).__init__()
+                self.conv1 = conv3x3(inplanes, planes, stride)
+                self.bn1 = nn.BatchNorm2d(planes)
+                self.relu = nn.ReLU(inplace=True)
+                self.conv2 = conv3x3(planes, planes)
+                self.bn2 = nn.BatchNorm2d(planes)
+                self.downsample = downsample
+                self.stride = stride
+
+            @torch.jit.script_method
+            def forward(self, x):
+                residual = x
+
+                out = self.conv1(x)
+                out = self.bn1(out)
+                out = self.relu(out)
+
+                out = self.conv2(out)
+                out = self.bn2(out)
+
+                if self.downsample is not None:
+                    residual = self.downsample(x)
+
+                out += residual
+                out = self.relu(out)
+
+                return out
+
+        class ResNet(torch.jit.ScriptModule):
+            __constants__ = ['layer1', 'layer2', 'layer3', 'layer4']
+
+            def __init__(self, block, layers, num_classes=1000):
+                super(ResNet, self).__init__()
+                self.inplanes = 64
+                self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                                       bias=False)
+                self.bn1 = nn.BatchNorm2d(64)
+                self.relu = nn.ReLU(inplace=True)
+                self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+                self.layer1 = self._make_layer(block, 64, layers[0])
+                self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+                self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+                self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+                self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+                self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+                for m in self.modules():
+                    if isinstance(m, nn.Conv2d):
+                        torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                    elif isinstance(m, nn.BatchNorm2d):
+                        torch.nn.init.constant_(m.weight, 1)
+                        torch.nn.init.constant_(m.bias, 0)
+
+            def _make_layer(self, block, planes, blocks, stride=1):
+                downsample = None
+                if stride != 1 or self.inplanes != planes * block.expansion:
+                    downsample = nn.Sequential(
+                        conv1x1(self.inplanes, planes * block.expansion, stride),
+                        nn.BatchNorm2d(planes * block.expansion),
+                    )
+
+                layers = []
+                layers.append(block(self.inplanes, planes, stride, downsample))
+                self.inplanes = planes * block.expansion
+                for _ in range(1, blocks):
+                    layers.append(block(self.inplanes, planes))
+
+                return nn.Sequential(*layers)
+
+            @torch.jit.script_method
+            def forward(self, x):
+                x = self.conv1(x)
+                x = self.bn1(x)
+                x = self.relu(x)
+                x = self.maxpool(x)
+
+                x = self.layer1(x)
+                x = self.layer2(x)
+                x = self.layer3(x)
+                x = self.layer4(x)
+
+                x = self.avgpool(x)
+                x = x.view(x.size(0), -1)
+                x = self.fc(x)
+
+                return x
+
+        resnet18 = ResNet(BasicBlock, [2, 2, 2, 2])
+
+        self.checkExportImport(torchvision.models.resnet18().eval(), (torch.randn(1, 3, 224, 224),))
+
+    @unittest.skip('torchvision models are not supported yet')  # FIXME
+    def test_alexnet(self):
+        x = torch.ones(1, 3, 224, 224)
+        model = torchvision.models.AlexNet()
+        self.checkExportImport(model, (x,))
--- a/test/ut/sdk/test_model_speedup.py
+++ b/test/ut/sdk/test_model_speedup.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT license.

 import os
+import psutil
 import sys
 import numpy as np
 import torch
@@ -128,6 +129,18 @@ def generate_random_sparsity(model):
                             'sparsity': sparsity})
    return cfg_list

+def generate_random_sparsity_v2(model):
+    """
+    Only select 50% layers to prune.
+    """
+    cfg_list = []
+    for name, module in model.named_modules():
+        if isinstance(module, nn.Conv2d):
+            if np.random.uniform(0, 1.0) > 0.5:
+                sparsity = np.random.uniform(0.5, 0.99)
+                cfg_list.append({'op_types': ['Conv2d'], 'op_names': [name],
+                             'sparsity': sparsity})
+    return cfg_list

 def zero_bn_bias(model):
    with torch.no_grad():
@@ -292,52 +305,62 @@ class SpeedupTestCase(TestCase):
    # Example: https://msrasrg.visualstudio.com/NNIOpenSource/_build/results?buildId=16282

    def test_speedup_integration(self):
-        for model_name in ['resnet18', 'squeezenet1_1', 
-                           'mobilenet_v2', 'densenet121',
+        # skip this test on windows(7GB mem available) due to memory limit
+        # Note: hack trick, may be updated in the future
+        if 'win' in sys.platform or 'Win'in sys.platform:
+            print('Skip test_speedup_integration on windows due to memory limit!')
+            return
+
+        Gen_cfg_funcs = [generate_random_sparsity, generate_random_sparsity_v2]
+
+        for model_name in ['resnet18', 'mobilenet_v2', 'squeezenet1_1', 'densenet121' , 'densenet169', 
                           # 'inception_v3' inception is too large and may fail the pipeline
-                           'densenet169', 'resnet50']:
-            kwargs = {
-                'pretrained': True
-            }
-            if model_name == 'resnet50':
-                # testing multiple groups
+                            'resnet50']:
+                            
+            for gen_cfg_func in Gen_cfg_funcs:
                kwargs = {
-                    'pretrained': False,
-                    'groups': 4
+                    'pretrained': True
                }
+                if model_name == 'resnet50':
+                    # testing multiple groups
+                    kwargs = {
+                        'pretrained': False,
+                        'groups': 4
+                    }
+                Model = getattr(models, model_name)
+                net = Model(**kwargs).to(device)
+                speedup_model = Model(**kwargs).to(device)
+                net.eval()  # this line is necessary
+                speedup_model.eval()
+                # random generate the prune config for the pruner
+                cfgs = gen_cfg_func(net)
+                print("Testing {} with compression config \n {}".format(model_name, cfgs))
+                pruner = L1FilterPruner(net, cfgs)
+                pruner.compress()
+                pruner.export_model(MODEL_FILE, MASK_FILE)
+                pruner._unwrap_model()
+                state_dict = torch.load(MODEL_FILE)
+                speedup_model.load_state_dict(state_dict)
+                zero_bn_bias(net)
+                zero_bn_bias(speedup_model)
+
+                data = torch.ones(BATCH_SIZE, 3, 128, 128).to(device)
+                ms = ModelSpeedup(speedup_model, data, MASK_FILE)
+                ms.speedup_model()
+
+                speedup_model.eval()
+
+                ori_out = net(data)
+                speeded_out = speedup_model(data)
+                ori_sum = torch.sum(ori_out).item()
+                speeded_sum = torch.sum(speeded_out).item()
+                print('Sum of the output of %s (before speedup):' %
+                    model_name, ori_sum)
+                print('Sum of the output of %s (after speedup):' %
+                    model_name, speeded_sum)
+                assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
+                    (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)

-            Model = getattr(models, model_name)
-            net = Model(**kwargs).to(device)
-            speedup_model = Model(**kwargs).to(device)
-            net.eval()  # this line is necessary
-            speedup_model.eval()
-            # random generate the prune config for the pruner
-            cfgs = generate_random_sparsity(net)
-            pruner = L1FilterPruner(net, cfgs)
-            pruner.compress()
-            pruner.export_model(MODEL_FILE, MASK_FILE)
-            pruner._unwrap_model()
-            state_dict = torch.load(MODEL_FILE)
-            speedup_model.load_state_dict(state_dict)
-            zero_bn_bias(net)
-            zero_bn_bias(speedup_model)
-
-            data = torch.ones(BATCH_SIZE, 3, 128, 128).to(device)
-            ms = ModelSpeedup(speedup_model, data, MASK_FILE)
-            ms.speedup_model()
-
-            speedup_model.eval()
-
-            ori_out = net(data)
-            speeded_out = speedup_model(data)
-            ori_sum = torch.sum(ori_out).item()
-            speeded_sum = torch.sum(speeded_out).item()
-            print('Sum of the output of %s (before speedup):' %
-                  model_name, ori_sum)
-            print('Sum of the output of %s (after speedup):' %
-                  model_name, speeded_sum)
-            assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
-                   (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)

    def test_channel_prune(self):
        orig_net = resnet18(num_classes=10).to(device)
@@ -369,8 +392,10 @@ class SpeedupTestCase(TestCase):
            (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)

    def tearDown(self):
-        os.remove(MODEL_FILE)
-        os.remove(MASK_FILE)
+        if os.path.exists(MODEL_FILE):
+            os.remove(MODEL_FILE)
+        if os.path.exists(MASK_FILE):
+            os.remove(MASK_FILE)


 if __name__ == '__main__':

--- a/ts/nni_manager/common/utils.ts
+++ b/ts/nni_manager/common/utils.ts
@@ -344,16 +344,19 @@ function getTunerProc(command: string, stdio: StdioOptions, newCwd: string, newE
    let cmd: string = command;
    let arg: string[] = [];
    let newShell: boolean = true;
+    let isDetached: boolean = false;
    if (process.platform === "win32") {
        cmd = command.split(" ", 1)[0];
        arg = command.substr(cmd.length + 1).split(" ");
        newShell = false;
+        isDetached = true;
    }
    const tunerProc: ChildProcess = spawn(cmd, arg, {
        stdio,
        cwd: newCwd,
        env: newEnv,
-        shell: newShell
+        shell: newShell,
+        detached: isDetached
    });
    return tunerProc;
 }
@@ -434,8 +437,8 @@ function withLockSync(func: Function, filePath: string, lockOpts: {[key: string]
        const lockPath = path.join(path.dirname(filePath), path.basename(filePath) + '.lock.*');
        const lockFileNames: string[] = glob.sync(lockPath);
        const canLock: boolean = lockFileNames.map((fileName) => {
-            return fs.existsSync(fileName) && Date.now() - fs.statSync(fileName).mtimeMs > lockOpts.stale;
-        }).filter(isExpired=>isExpired === false).length === 0;
+            return fs.existsSync(fileName) && Date.now() - fs.statSync(fileName).mtimeMs < lockOpts.stale;
+        }).filter(unexpired=>unexpired === true).length === 0;
        if (!canLock) {
            throw new Error('File has been locked.');
        }

--- a/ts/nni_manager/core/nnimanager.ts
+++ b/ts/nni_manager/core/nnimanager.ts
@@ -450,15 +450,17 @@ class NNIManager implements Manager {
            throw new Error('Error: tuner has not been setup');
        }
        this.trainingService.removeTrialJobMetricListener(this.trialJobMetricListener);
-        this.dispatcher.sendCommand(TERMINATE);
-        let tunerAlive: boolean = true;
-        // gracefully terminate tuner and assessor here, wait at most 30 seconds.
-        for (let i: number = 0; i < 30; i++) {
-            if (!tunerAlive) { break; }
-            tunerAlive = await isAlive(this.dispatcherPid);
-            await delay(1000);
-        }
-        await killPid(this.dispatcherPid);
+        if (this.dispatcherPid > 0) {
+            this.dispatcher.sendCommand(TERMINATE);
+            let tunerAlive: boolean = true;
+            // gracefully terminate tuner and assessor here, wait at most 30 seconds.
+            for (let i: number = 0; i < 30; i++) {
+                if (!tunerAlive) { break; }
+                tunerAlive = await isAlive(this.dispatcherPid);
+                await delay(1000);
+            }
+            await killPid(this.dispatcherPid);
+        }
        const trialJobList: TrialJobDetail[] = await this.trainingService.listTrialJobs();

        // DON'T try to make it in parallel, the training service may not handle it well.
@@ -480,7 +482,6 @@ class NNIManager implements Manager {
        }
        await this.storeExperimentProfile();
        this.setStatus('STOPPED');
-        this.experimentManager.setExperimentInfo(this.experimentProfile.id, 'port', undefined);
    }

    private async periodicallyUpdateExecDuration(): Promise<void> {

--- a/ts/nni_manager/main.ts
+++ b/ts/nni_manager/main.ts
@@ -37,7 +37,7 @@ function initStartupInfo(
 }

 async function initContainer(foreground: boolean, platformMode: string, logFileName?: string): Promise<void> {
-    const routerPlatformMode = ['remote', 'pai', 'aml', 'heterogeneous'];
+    const routerPlatformMode = ['remote', 'pai', 'aml', 'hybrid'];
    if (routerPlatformMode.includes(platformMode)) {
        Container.bind(TrainingService)
            .to(RouterTrainingService)
@@ -97,7 +97,7 @@ async function initContainer(foreground: boolean, platformMode: string, logFileN

 function usage(): void {
    console.info('usage: node main.js --port <port> --mode \
-    <local/remote/pai/kubeflow/frameworkcontroller/paiYarn/aml/adl/heterogeneous> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
+    <local/remote/pai/kubeflow/frameworkcontroller/paiYarn/aml/adl/hybrid> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
 }

 const strPort: string = parseArg(['--port', '-p']);
@@ -117,7 +117,7 @@ const foreground: boolean = foregroundArg.toLowerCase() === 'true' ? true : fals
 const port: number = parseInt(strPort, 10);

 const mode: string = parseArg(['--mode', '-m']);
-if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'heterogeneous'].includes(mode)) {
+if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'hybrid'].includes(mode)) {
    console.log(`FATAL: unknown mode: ${mode}`);
    usage();
    process.exit(1);

--- a/ts/nni_manager/package.json
+++ b/ts/nni_manager/package.json
@@ -21,7 +21,7 @@
    "lockfile": "^1.0.4",
    "python-shell": "^2.0.1",
    "rx": "^4.1.0",
-    "sqlite3": "^5.0.0",
+    "sqlite3": "5.0.0",
    "ssh2": "^0.6.1",
    "stream-buffers": "^3.0.2",
    "tail-stream": "^0.3.4",
@@ -68,17 +68,19 @@
  },
  "resolutions": {
    "mem": "^4.0.0",
-    "lodash": "^4.17.13",
-    "lodash.merge": "^4.6.2",
+    "lodash": ">=4.17.13",
+    "lodash.merge": ">=4.6.2",
    "node.extend": "^1.1.7",
    "hoek": "^4.2.1",
    "js-yaml": "^3.13.1",
-    "acorn": ">=7.1.1",
-    "node-forge": "^0.10.0",
+    "node-forge": ">=0.10.0",
    "dot-prop": "^4.2.1",
    "npm": ">=6.14.8",
    "yargs": ">=16.0.3",
-    "yargs-parser": ">=20.2.0"
+    "yargs-parser": ">=20.2.0",
+    "y18n": ">=5.0.5",
+    "acorn": ">=8.0.4",
+    "serialize-javascript": ">=5.0.1"
  },
  "engines": {
    "node": ">=10.0.0"

--- a/ts/nni_manager/rest_server/restValidationSchemas.ts
+++ b/ts/nni_manager/rest_server/restValidationSchemas.ts
@@ -183,7 +183,7 @@ export namespace ValidationSchemas {
                maxTrialNumPerGpu: joi.number(),
                useActiveGpu: joi.boolean()
            }),
-            heterogeneous_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
+            hybrid_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
                trainingServicePlatforms: joi.array(),
            }),
            nni_manager_ip: joi.object({ // eslint-disable-line @typescript-eslint/camelcase

--- a/ts/nni_manager/training_service/common/trialConfigMetadataKey.ts
+++ b/ts/nni_manager/training_service/common/trialConfigMetadataKey.ts
@@ -11,7 +11,7 @@ export enum TrialConfigMetadataKey {
    LOCAL_CONFIG = 'local_config',
    TRIAL_CONFIG = 'trial_config',
    REMOTE_CONFIG = 'remote_config',
-    HETEROGENEOUS_CONFIG = 'heterogeneous_config',
+    HYBRID_CONFIG = 'hybrid_config',
    EXPERIMENT_ID = 'experimentId',
    MULTI_PHASE = 'multiPhase',
    RANDOM_SCHEDULER = 'random_scheduler',
@@ -24,7 +24,7 @@ export enum TrialConfigMetadataKey {
    AML_CLUSTER_CONFIG = 'aml_config',
    VERSION_CHECK = 'version_check',
    LOG_COLLECTION = 'log_collection',
-    // Used to set platform for heterogeneous in reuse mode, 
+    // Used to set platform for hybrid in reuse mode, 
    // temproarily change and will refactor config schema in the future
    PLATFORM_LIST = 'platform_list'
 }
--- a/ts/nni_manager/training_service/reusable/environments/remoteEnvironmentService.ts
+++ b/ts/nni_manager/training_service/reusable/environments/remoteEnvironmentService.ts
@@ -271,9 +271,9 @@ ${environment.command} --job_pid_file ${environment.runnerWorkingFolder}/pid \
        environment.command, { encoding: 'utf8' });
        // Copy files in codeDir to remote working directory
        await executor.copyDirectoryToRemote(environmentLocalTempFolder, environment.runnerWorkingFolder);
-        // Execute command in remote machine
+        // Execute command in remote machine, set isInteractive=true to run script in conda environment
        executor.executeScript(executor.joinPath(environment.runnerWorkingFolder,
-            executor.getScriptName("run")), true, false);
+            executor.getScriptName("run")), true, true);
        if (environment.rmMachineMeta === undefined) {
            throw new Error(`${environment.id} rmMachineMeta not initialized!`);
        }

--- a/ts/nni_manager/training_service/reusable/routerTrainingService.ts
+++ b/ts/nni_manager/training_service/reusable/routerTrainingService.ts
@@ -95,8 +95,8 @@ class RouterTrainingService implements TrainingService {

    public async setClusterMetadata(key: string, value: string): Promise<void> {
        if (this.internalTrainingService === undefined) {
-            // Need to refactor configuration, remove heterogeneous_config field in the future
-            if (key === TrialConfigMetadataKey.HETEROGENEOUS_CONFIG){
+            // Need to refactor configuration, remove hybrid_config field in the future
+            if (key === TrialConfigMetadataKey.HYBRID_CONFIG){
                this.internalTrainingService = component.get(TrialDispatcher);
                const heterogenousConfig: HeterogenousConfig = <HeterogenousConfig>JSON.parse(value);
                if (this.internalTrainingService === undefined) {

--- a/ts/nni_manager/yarn.lock
+++ b/ts/nni_manager/yarn.lock
@@ -529,9 +529,10 @@ acorn-jsx@^5.1.0:
  version "5.1.0"
  resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.1.0.tgz#294adb71b57398b0680015f0a38c563ee1db5384"

-acorn@>=7.1.1, acorn@^7.1.0:
-  version "7.1.1"
-  resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.1.1.tgz#e35668de0b402f359de515c5482a1ab9f89a69bf"
+acorn@>=8.0.4, acorn@^7.1.0:
+  version "8.0.4"
+  resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.0.4.tgz#7a3ae4191466a6984eee0fe3407a4f3aa9db8354"
+  integrity sha512-XNP0PqF1XD19ZlLKvB7cMmnZswW4C/03pRHgirB30uSJTaS3A3V1/P4sS3HPvFmjoriPCJQs+JDSbm4bL1TxGQ==

 agent-base@4, agent-base@^4.3.0:
  version "4.3.0"
@@ -3122,9 +3123,10 @@ lodash.intersection@^4.4.0:
  version "4.4.0"
  resolved "https://registry.yarnpkg.com/lodash.intersection/-/lodash.intersection-4.4.0.tgz#0a11ba631d0e95c23c7f2f4cbb9a692ed178e705"

-lodash.merge@^4.6.1, lodash.merge@^4.6.2:
+lodash.merge@>=4.6.2, lodash.merge@^4.6.1:
  version "4.6.2"
  resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a"
+  integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==

 lodash.omit@^4.5.0:
  version "4.5.0"
@@ -3154,10 +3156,10 @@ lodash.without@~4.4.0:
  version "4.4.0"
  resolved "https://registry.yarnpkg.com/lodash.without/-/lodash.without-4.4.0.tgz#3cd4574a00b67bae373a94b748772640507b7aac"

-lodash@^4.17.11, lodash@^4.17.13, lodash@^4.17.14, lodash@^4.17.15:
-  version "4.17.19"
-  resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.19.tgz#e48ddedbe30b3321783c5b4301fbd353bc1e4a4b"
-  integrity sha512-JNvd8XER9GQX0v2qJgsaN/mzFCNA5BRe/j8JN9d+tWyGLSodKQHKFicdwNYzWwI3wjRnaKPsGj1XkBjx/F96DQ==
+lodash@>=4.17.13, lodash@^4.17.11, lodash@^4.17.13, lodash@^4.17.14, lodash@^4.17.15:
+  version "4.17.20"
+  resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.20.tgz#b44a9b6297bcb698f1c51a3545a2b3b368d59c52"
+  integrity sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==

 log-symbols@4.0.0:
  version "4.0.0"
@@ -3476,7 +3478,7 @@ node-fetch-npm@^2.0.2:
    json-parse-better-errors "^1.0.0"
    safe-buffer "^5.1.1"

-node-forge@^0.10.0, node-forge@^0.7.6:
+node-forge@>=0.10.0, node-forge@^0.7.6:
  version "0.10.0"
  resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3"
  integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA==
@@ -4836,10 +4838,10 @@ send@0.16.2:
    range-parser "~1.2.0"
    statuses "~1.4.0"

-serialize-javascript@4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-4.0.0.tgz#b525e1238489a5ecfc42afacc3fe99e666f4b1aa"
-  integrity sha512-GaNA54380uFefWghODBWEGisLZFj00nS5ACs6yHa9nLqlLpVLO8ChDGeKRjZnV4Nh4n0Qi7nhYZD/9fCPzEqkw==
+serialize-javascript@4.0.0, serialize-javascript@>=5.0.1:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-5.0.1.tgz#7886ec848049a462467a97d3d918ebb2aaf934f4"
+  integrity sha512-SaaNal9imEO737H2c05Og0/8LUXG7EnsZyMa8MzkmuHoELfT6txuj0cMqRj6zfPKnmQ1yasR4PCJc8x+M4JSPA==
  dependencies:
    randombytes "^2.1.0"

@@ -5006,7 +5008,7 @@ sprintf-js@~1.0.2:
  version "1.0.3"
  resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"

-sqlite3@^5.0.0:
+sqlite3@5.0.0:
  version "5.0.0"
  resolved "https://registry.yarnpkg.com/sqlite3/-/sqlite3-5.0.0.tgz#1bfef2151c6bc48a3ab1a6c126088bb8dd233566"
  integrity sha512-rjvqHFUaSGnzxDy2AHCwhHy6Zp6MNJzCPGYju4kD8yi6bze4d1/zMTg6C7JI49b7/EM7jKMTvyfN/4ylBKdwfw==
@@ -5725,15 +5727,10 @@ xtend@~4.0.1:
  resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"
  integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==

-y18n@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.0.tgz#95ef94f85ecc81d007c264e190a120f0a3c8566b"
-  integrity sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==
-
-y18n@^5.0.2:
-  version "5.0.4"
-  resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.4.tgz#0ab2db89dd5873b5ec4682d8e703e833373ea897"
-  integrity sha512-deLOfD+RvFgrpAmSZgfGdWYE+OKyHcVHaRQ7NphG/63scpRvTHHeQMAxGGvaLVGJ+HYVcCXlzcTK0ZehFf+eHQ==
+y18n@>=5.0.5, y18n@^4.0.0, y18n@^5.0.2:
+  version "5.0.5"
+  resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.5.tgz#8769ec08d03b1ea2df2500acef561743bbb9ab18"
+  integrity sha512-hsRUr4FFrvhhRH12wOdfs38Gy7k2FFzB9qgN9v3aLykRq0dRcdcpz5C9FxdS2NuhOrI/628b/KSTJ3rwHysYSg==

 yallist@^2.1.2:
  version "2.1.2"

--- a/ts/webui/package.json
+++ b/ts/webui/package.json
@@ -46,7 +46,6 @@
    "react-json-tree": "^0.11.2",
    "react-monaco-editor": "^0.32.1",
    "react-paginate": "^6.3.2",
-    "react-pagination": "^1.0.0",
    "react-responsive": "^8.1.1",
    "react-router": "^5.2.0",
    "react-router-dom": "^5.2.0",
@@ -115,6 +114,10 @@
  },
  "resolutions": {
    "npm": ">=6.14.4",
-    "yargs": ">=16.0.3"
+    "yargs": ">=16.0.3",
+    "acorn": ">=8.0.4",
+    "node-forge": ">=0.10.0",
+    "y18n": ">=5.0.5",
+    "serialize-javascript": ">=5.0.1"
  }
 }
--- a/ts/webui/src/App.scss
+++ b/ts/webui/src/App.scss
@@ -36,7 +36,6 @@

    /* nav bar: 56 + marginTop: 24 */
    margin-top: 80px;
-    margin-bottom: 30px;
 }

 .bottomDiv {