[Retiarii] pytorch code converter (#3052)

8af73146 · QuanluZhang · GitHub · 002af91f · 8af73146 · 8af73146
Unverified Commit 8af73146 authored Nov 18, 2020 by QuanluZhang Committed by GitHub Nov 18, 2020
7 changed files
--- a/test/convert_test/base_mnasnet.py
+++ b/test/convert_test/base_mnasnet.py
+import warnings
+import torch
+import torch.nn as torch_nn
+from torchvision.models.utils import load_state_dict_from_url
+import torch.nn.functional as F
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[2]))
+from nni.retiarii import nn
+# Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is
+# 1.0 - tensorflow.
+_BN_MOMENTUM = 1 - 0.9997
+_FIRST_DEPTH = 32
+_MOBILENET_V2_FILTERS = [16, 24, 32, 64, 96, 160, 320]
+_MOBILENET_V2_NUM_LAYERS = [1, 2, 3, 4, 3, 3, 1]
+class _ResidualBlock(nn.Module):
+    def __init__(self, net):
+        super().__init__()
+        self.net = net
+    def forward(self, x):
+        return self.net(x) + x
+class _InvertedResidual(nn.Module):
+    def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor, skip, bn_momentum=0.1):
+        super(_InvertedResidual, self).__init__()
+        assert stride in [1, 2]
+        assert kernel_size in [3, 5]
+        mid_ch = in_ch * expansion_factor
+        self.apply_residual = skip and in_ch == out_ch and stride == 1
+        self.layers = nn.Sequential(
+            # Pointwise
+            nn.Conv2d(in_ch, mid_ch, 1, bias=False),
+            nn.BatchNorm2d(mid_ch, momentum=bn_momentum),
+            nn.ReLU(inplace=True),
+            # Depthwise
+            nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=kernel_size // 2,
+                      stride=stride, groups=mid_ch, bias=False),
+            nn.BatchNorm2d(mid_ch, momentum=bn_momentum),
+            nn.ReLU(inplace=True),
+            # Linear pointwise. Note that there's no activation.
+            nn.Conv2d(mid_ch, out_ch, 1, bias=False),
+            nn.BatchNorm2d(out_ch, momentum=bn_momentum))
+    def forward(self, input):
+        if self.apply_residual:
+            ret = self.layers(input) + input
+        else:
+            ret = self.layers(input)
+        return ret
+def _stack_inverted_residual(in_ch, out_ch, kernel_size, skip, stride, exp_factor, repeats, bn_momentum):
+    """ Creates a stack of inverted residuals. """
+    assert repeats >= 1
+    # First one has no skip, because feature map size changes.
+    first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor, skip, bn_momentum=bn_momentum)
+    remaining = []
+    for _ in range(1, repeats):
+        remaining.append(_InvertedResidual(out_ch, out_ch, kernel_size, 1, exp_factor, skip, bn_momentum=bn_momentum))
+    return nn.Sequential(first, *remaining)
+def _stack_normal_conv(in_ch, out_ch, kernel_size, skip, dconv, stride, repeats, bn_momentum):
+    assert repeats >= 1
+    stack = []
+    for i in range(repeats):
+        s = stride if i == 0 else 1
+        if dconv:
+            modules = [
+                nn.Conv2d(in_ch, in_ch, kernel_size, padding=kernel_size // 2, stride=s, groups=in_ch, bias=False),
+                nn.BatchNorm2d(in_ch, momentum=bn_momentum),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(in_ch, out_ch, 1, padding=0, stride=1, bias=False),
+                nn.BatchNorm2d(out_ch, momentum=bn_momentum)
+            ]
+        else:
+            modules = [
+                nn.Conv2d(in_ch, out_ch, kernel_size, padding=kernel_size // 2, stride=s, bias=False),
+                nn.ReLU(inplace=True),
+                nn.BatchNorm2d(out_ch, momentum=bn_momentum)
+            ]
+        if skip and in_ch == out_ch and s == 1:
+            # use different implementation for skip and noskip to align with pytorch
+            stack.append(_ResidualBlock(nn.Sequential(*modules)))
+        else:
+            stack += modules
+        in_ch = out_ch
+    return stack
+def _round_to_multiple_of(val, divisor, round_up_bias=0.9):
+    """ Asymmetric rounding to make `val` divisible by `divisor`. With default
+    bias, will round up, unless the number is no more than 10% greater than the
+    smaller divisible value, i.e. (83, 8) -> 80, but (84, 8) -> 88. """
+    assert 0.0 < round_up_bias < 1.0
+    new_val = max(divisor, int(val + divisor / 2) // divisor * divisor)
+    return new_val if new_val >= round_up_bias * val else new_val + divisor
+def _get_depths(depths, alpha):
+    """ Scales tensor depths as in reference MobileNet code, prefers rouding up
+    rather than down. """
+    return [_round_to_multiple_of(depth * alpha, 8) for depth in depths]
+class MNASNet(nn.Module):
+    """ MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
+    implements the B1 variant of the model.
+    >>> model = MNASNet(1000, 1.0)
+    >>> x = torch.rand(1, 3, 224, 224)
+    >>> y = model(x)
+    >>> y.dim()
+    1
+    >>> y.nelement()
+    1000
+    """
+    # Version 2 adds depth scaling in the initial stages of the network.
+    _version = 2
+    def __init__(self, alpha, depths, convops, kernel_sizes, num_layers,
+                 skips, num_classes=1000, dropout=0.2):
+        super(MNASNet, self).__init__()
+        assert alpha > 0.0
+        assert len(depths) == len(convops) == len(kernel_sizes) == len(num_layers) == len(skips) == 7
+        self.alpha = alpha
+        self.num_classes = num_classes
+        depths = _get_depths([_FIRST_DEPTH] + depths, alpha)
+        base_filter_sizes = [16, 24, 40, 80, 96, 192, 320]
+        exp_ratios = [3, 3, 3, 6, 6, 6, 6]
+        strides = [1, 2, 2, 2, 1, 2, 1]
+        layers = [
+            # First layer: regular conv.
+            nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False),
+            nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM),
+            nn.ReLU(inplace=True),
+        ]
+        count = 0
+        #for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \
+        #        zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios):
+        for filter_size, exp_ratio, stride in zip(base_filter_sizes, exp_ratios, strides):
+            # TODO: restrict that "choose" can only be used within mutator
+            ph = nn.Placeholder(label=f'mutable_{count}', related_info={
+                           'kernel_size_options': [1, 3, 5],
+                           'n_layer_options': [1, 2, 3, 4],
+                           'op_type_options': ['__mutated__.base_mnasnet.RegularConv',
+                                               '__mutated__.base_mnasnet.DepthwiseConv',
+                                               '__mutated__.base_mnasnet.MobileConv'],
+                           #'se_ratio_options': [0, 0.25],
+                           'skip_options': ['identity', 'no'],
+                           'n_filter_options': [int(filter_size*x) for x in [0.75, 1.0, 1.25]],
+                           'exp_ratio': exp_ratio,
+                           'stride': stride,
+                           'in_ch': depths[0] if count == 0 else None
+            })
+            layers.append(ph)
+            '''if conv == "mconv":
+                # MNASNet blocks: stacks of inverted residuals.
+                layers.append(_stack_inverted_residual(prev_depth, depth, ks, skip,
+                                                       stride, exp_ratio, repeat, _BN_MOMENTUM))
+            else:
+                # Normal conv and depth-separated conv
+                layers += _stack_normal_conv(prev_depth, depth, ks, skip, conv == "dconv",
+                                             stride, repeat, _BN_MOMENTUM)'''
+            count += 1
+            if count >= 2:
+                break
+        layers += [
+            # Final mapping to classifier input.
+            nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False),
+            nn.BatchNorm2d(1280, momentum=_BN_MOMENTUM),
+            nn.ReLU(inplace=True),
+        ]
+        self.layers = nn.Sequential(*layers)
+        self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True),
+                                        nn.Linear(1280, num_classes))
+        self._initialize_weights()
+        #self.for_test = 10
+    def forward(self, x):
+        #if self.for_test == 10:
+        x = self.layers(x)
+        # Equivalent to global avgpool and removing H and W dimensions.
+        x = x.mean([2, 3])
+        x = F.relu(x)
+        return self.classifier(x)
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                torch_nn.init.kaiming_normal_(m.weight, mode="fan_out",
+                                        nonlinearity="relu")
+                if m.bias is not None:
+                    torch_nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm2d):
+                torch_nn.init.ones_(m.weight)
+                torch_nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                torch_nn.init.kaiming_uniform_(m.weight, mode="fan_out",
+                                         nonlinearity="sigmoid")
+                torch_nn.init.zeros_(m.bias)
+def test_model(model):
+    model(torch.randn(2, 3, 224, 224))
+#====================definition of candidate op classes
+BN_MOMENTUM = 1 - 0.9997
+class RegularConv(nn.Module):
+    def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.in_ch = in_ch
+        self.out_ch = out_ch
+        self.skip = skip
+        self.exp_ratio = exp_ratio
+        self.stride = stride
+        self.conv = nn.Conv2d(in_ch, out_ch, kernel_size, padding=kernel_size // 2, stride=stride, bias=False)
+        self.relu = nn.ReLU(inplace=True)
+        self.bn = nn.BatchNorm2d(out_ch, momentum=BN_MOMENTUM)
+    def forward(self, x):
+        out = self.bn(self.relu(self.conv(x)))
+        if self.skip == 'identity':
+            out = out + x
+        return out
+class DepthwiseConv(nn.Module):
+    def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.in_ch = in_ch
+        self.out_ch = out_ch
+        self.skip = skip
+        self.exp_ratio = exp_ratio
+        self.stride = stride
+        self.conv1 = nn.Conv2d(in_ch, in_ch, kernel_size, padding=kernel_size // 2, stride=stride, groups=in_ch, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_ch, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(in_ch, out_ch, 1, padding=0, stride=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_ch, momentum=BN_MOMENTUM)
+    def forward(self, x):
+        out = self.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        if self.skip == 'identity':
+            out = out + x
+        return out
+class MobileConv(nn.Module):
+    def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.in_ch = in_ch
+        self.out_ch = out_ch
+        self.skip = skip
+        self.exp_ratio = exp_ratio
+        self.stride = stride
+        mid_ch = in_ch * exp_ratio
+        self.layers = nn.Sequential(
+            # Pointwise
+            nn.Conv2d(in_ch, mid_ch, 1, bias=False),
+            nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM),
+            nn.ReLU(inplace=True),
+            # Depthwise
+            nn.Conv2d(mid_ch, mid_ch, kernel_size, padding= (kernel_size - 1) // 2,
+                      stride=stride, groups=mid_ch, bias=False),
+            nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM),
+            nn.ReLU(inplace=True),
+            # Linear pointwise. Note that there's no activation.
+            nn.Conv2d(mid_ch, out_ch, 1, bias=False),
+            nn.BatchNorm2d(out_ch, momentum=BN_MOMENTUM))
+    def forward(self, x):
+        out = self.layers(x)
+        if self.skip == 'identity':
+            out = out + x
+        return out
+#====================Training approach
+'''
+import sdk
+from sdk.mutators.builtin_mutators import ModuleMutator
+import datasets
+class ModelTrain(sdk.Trainer):
+    def __init__(self, device='cuda'):
+        super(ModelTrain, self).__init__()
+        self.device = torch.device(device)
+        self.data_provider = datasets.ImagenetDataProvider(save_path="/data/v-yugzh/imagenet",
+                                                    train_batch_size=32,
+                                                    test_batch_size=32,
+                                                    valid_size=None,
+                                                    n_worker=4,
+                                                    resize_scale=0.08,
+                                                    distort_color='normal')
+    def train_dataloader(self):
+        return self.data_provider.train
+    def val_dataloader(self):
+        return self.data_provider.valid
+'''
+#====================Experiment config
+# mnasnet0_5
+ir_module = _InvertedResidual(16, 16, 3, 1, 1, True)
\ No newline at end of file
--- a/test/convert_test/mnasnet.py
+++ b/test/convert_test/mnasnet.py
+import warnings
+import torch
+import torch.nn as torch_nn
+from torchvision.models.utils import load_state_dict_from_url
+import torch.nn.functional as F
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[2]))
+from nni.retiarii import nn
+# Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is
+# 1.0 - tensorflow.
+_BN_MOMENTUM = 1 - 0.9997
+_FIRST_DEPTH = 32
+_MOBILENET_V2_FILTERS = [16, 24, 32, 64, 96, 160, 320]
+_MOBILENET_V2_NUM_LAYERS = [1, 2, 3, 4, 3, 3, 1]
+class _ResidualBlock(nn.Module):
+    def __init__(self, net):
+        super().__init__()
+        self.net = net
+    def forward(self, x):
+        return self.net(x) + x
+class _InvertedResidual(nn.Module):
+    def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor, skip, bn_momentum=0.1):
+        super(_InvertedResidual, self).__init__()
+        assert stride in [1, 2]
+        assert kernel_size in [3, 5]
+        mid_ch = in_ch * expansion_factor
+        self.apply_residual = skip and in_ch == out_ch and stride == 1
+        self.layers = nn.Sequential(
+            # Pointwise
+            nn.Conv2d(in_ch, mid_ch, 1, bias=False),
+            nn.BatchNorm2d(mid_ch, momentum=bn_momentum),
+            nn.ReLU(inplace=True),
+            # Depthwise
+            nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=kernel_size // 2,
+                      stride=stride, groups=mid_ch, bias=False),
+            nn.BatchNorm2d(mid_ch, momentum=bn_momentum),
+            nn.ReLU(inplace=True),
+            # Linear pointwise. Note that there's no activation.
+            nn.Conv2d(mid_ch, out_ch, 1, bias=False),
+            nn.BatchNorm2d(out_ch, momentum=bn_momentum))
+        '''self.cells = nn.ModuleList()
+        for i in range(5):
+            self.cells.append(nn.Conv2d(2,2,2))'''
+        #self.x = 10
+    def forward(self, input):
+        '''out = input
+        x = 0
+        for i, each in enumerate(self.cells):
+            out = each(out)
+            if i == 1:
+                out = F.relu(out)
+            x += i
+        y = out
+        for i in range(3):
+            y = y + out'''
+        '''out = self.cells[0](out)
+        out = self.cells[1](out)
+        out = self.cells[2](out)
+        out = self.cells[3](out)
+        out = self.cells[4](out)'''
+        if self.apply_residual:
+            ret = self.layers(input) + input
+        else:
+            ret = self.layers(input)
+        return ret
+def _stack_inverted_residual(in_ch, out_ch, kernel_size, skip, stride, exp_factor, repeats, bn_momentum):
+    """ Creates a stack of inverted residuals. """
+    assert repeats >= 1
+    # First one has no skip, because feature map size changes.
+    first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor, skip, bn_momentum=bn_momentum)
+    remaining = []
+    for _ in range(1, repeats):
+        remaining.append(_InvertedResidual(out_ch, out_ch, kernel_size, 1, exp_factor, skip, bn_momentum=bn_momentum))
+    return nn.Sequential(first, *remaining)
+def _stack_normal_conv(in_ch, out_ch, kernel_size, skip, dconv, stride, repeats, bn_momentum):
+    assert repeats >= 1
+    stack = []
+    for i in range(repeats):
+        s = stride if i == 0 else 1
+        if dconv:
+            modules = [
+                nn.Conv2d(in_ch, in_ch, kernel_size, padding=kernel_size // 2, stride=s, groups=in_ch, bias=False),
+                nn.BatchNorm2d(in_ch, momentum=bn_momentum),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(in_ch, out_ch, 1, padding=0, stride=1, bias=False),
+                nn.BatchNorm2d(out_ch, momentum=bn_momentum)
+            ]
+        else:
+            modules = [
+                nn.Conv2d(in_ch, out_ch, kernel_size, padding=kernel_size // 2, stride=s, bias=False),
+                nn.ReLU(inplace=True),
+                nn.BatchNorm2d(out_ch, momentum=bn_momentum)
+            ]
+        if skip and in_ch == out_ch and s == 1:
+            # use different implementation for skip and noskip to align with pytorch
+            stack.append(_ResidualBlock(nn.Sequential(*modules)))
+        else:
+            stack += modules
+        in_ch = out_ch
+    return stack
+def _round_to_multiple_of(val, divisor, round_up_bias=0.9):
+    """ Asymmetric rounding to make `val` divisible by `divisor`. With default
+    bias, will round up, unless the number is no more than 10% greater than the
+    smaller divisible value, i.e. (83, 8) -> 80, but (84, 8) -> 88. """
+    assert 0.0 < round_up_bias < 1.0
+    new_val = max(divisor, int(val + divisor / 2) // divisor * divisor)
+    return new_val if new_val >= round_up_bias * val else new_val + divisor
+def _get_depths(depths, alpha):
+    """ Scales tensor depths as in reference MobileNet code, prefers rouding up
+    rather than down. """
+    return [_round_to_multiple_of(depth * alpha, 8) for depth in depths]
+class MNASNet(nn.Module):
+    """ MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
+    implements the B1 variant of the model.
+    >>> model = MNASNet(1000, 1.0)
+    >>> x = torch.rand(1, 3, 224, 224)
+    >>> y = model(x)
+    >>> y.dim()
+    1
+    >>> y.nelement()
+    1000
+    """
+    # Version 2 adds depth scaling in the initial stages of the network.
+    _version = 2
+    def __init__(self, alpha, depths, convops, kernel_sizes, num_layers,
+                 skips, num_classes=1000, dropout=0.2):
+        super(MNASNet, self).__init__()
+        assert alpha > 0.0
+        assert len(depths) == len(convops) == len(kernel_sizes) == len(num_layers) == len(skips) == 7
+        self.alpha = alpha
+        self.num_classes = num_classes
+        depths = _get_depths([_FIRST_DEPTH] + depths, alpha)
+        exp_ratios = [3, 3, 3, 6, 6, 6, 6]
+        strides = [1, 2, 2, 2, 1, 2, 1]
+        layers = [
+            # First layer: regular conv.
+            nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False),
+            nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM),
+            nn.ReLU(inplace=True),
+        ]
+        count = 0
+        for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \
+                zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios):
+            if conv == "mconv":
+                # MNASNet blocks: stacks of inverted residuals.
+                layers.append(_stack_inverted_residual(prev_depth, depth, ks, skip,
+                                                       stride, exp_ratio, repeat, _BN_MOMENTUM))
+            else:
+                # Normal conv and depth-separated conv
+                layers += _stack_normal_conv(prev_depth, depth, ks, skip, conv == "dconv",
+                                             stride, repeat, _BN_MOMENTUM)
+            count += 1
+            if count >= 2:
+                break
+        layers += [
+            # Final mapping to classifier input.
+            nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False),
+            nn.BatchNorm2d(1280, momentum=_BN_MOMENTUM),
+            nn.ReLU(inplace=True),
+        ]
+        self.layers = nn.Sequential(*layers)
+        self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True),
+                                        nn.Linear(1280, num_classes))
+        self._initialize_weights()
+        #self.for_test = 10
+    def forward(self, x):
+        #if self.for_test == 10:
+        x = self.layers(x)
+        # Equivalent to global avgpool and removing H and W dimensions.
+        x = x.mean([2, 3])
+        x = F.relu(x)
+        return self.classifier(x)
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                torch_nn.init.kaiming_normal_(m.weight, mode="fan_out",
+                                        nonlinearity="relu")
+                if m.bias is not None:
+                    torch_nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm2d):
+                torch_nn.init.ones_(m.weight)
+                torch_nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                torch_nn.init.kaiming_uniform_(m.weight, mode="fan_out",
+                                         nonlinearity="sigmoid")
+                torch_nn.init.zeros_(m.bias)
+def test_model(model):
+    model(torch.randn(2, 3, 224, 224))
+#====================Training approach
+'''
+import sdk
+from sdk.mutators.builtin_mutators import ModuleMutator
+import datasets
+class ModelTrain(sdk.Trainer):
+    def __init__(self, device='cuda'):
+        super(ModelTrain, self).__init__()
+        self.device = torch.device(device)
+        self.data_provider = datasets.ImagenetDataProvider(save_path="/data/v-yugzh/imagenet",
+                                                    train_batch_size=32,
+                                                    test_batch_size=32,
+                                                    valid_size=None,
+                                                    n_worker=4,
+                                                    resize_scale=0.08,
+                                                    distort_color='normal')
+    def train_dataloader(self):
+        return self.data_provider.train
+    def val_dataloader(self):
+        return self.data_provider.valid
+'''
+#====================Experiment config
+# mnasnet0_5
+ir_module = _InvertedResidual(16, 16, 3, 1, 1, True)
\ No newline at end of file
--- a/test/convert_test/mutator.py
+++ b/test/convert_test/mutator.py
+import logging
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[2]))
+from nni.retiarii import Mutator
+from base_mnasnet import RegularConv, DepthwiseConv, MobileConv
+_logger = logging.getLogger(__name__)
+class BlockMutator(Mutator):
+    def __init__(self, target: str):
+        self.target = target
+    def mutate(self, model):
+        nodes = model.get_nodes_by_label(self.target)
+        assert len(nodes) == 1
+        node = nodes[0]
+        graph = node.graph
+        related_info = node.operation.parameters
+        kernel_size = self.choice(related_info['kernel_size_options'])
+        op_type = self.choice(related_info['op_type_options'])
+        #self.choice(related_info['se_ratio_options'])
+        skip = self.choice(related_info['skip_options'])
+        n_filter = self.choice(related_info['n_filter_options'])
+        if related_info['in_ch'] is not None:
+            _logger.info('zql debug X ...')
+            in_ch = related_info['in_ch']
+        else:
+            assert len(node.predecessors) == 1
+            the_node = node.predecessors[0]
+            _logger.info('zql debug ...')
+            _logger.info(the_node.operation.parameters)
+            _logger.info(the_node.__repr__())
+            in_ch = the_node.operation.parameters['out_ch']
+        # update the placeholder to be a new operation
+        node.update_operation(op_type, {
+            'kernel_size': kernel_size,
+            'in_ch': in_ch,
+            'out_ch': n_filter,
+            'skip': 'no',
+            'exp_ratio': related_info['exp_ratio'],
+            'stride': related_info['stride']
+        })
+        # insert new nodes after the placeholder
+        n_layer = self.choice(related_info['n_layer_options'])
+        for i in range(1, n_layer):
+            node = graph.insert_node_on_edge(node.outgoing_edges[0],
+                                             '{}_{}'.format(self.target, i),
+                                             op_type,
+                                             {'kernel_size': kernel_size,
+                                              'in_ch': n_filter,
+                                              'out_ch': n_filter,
+                                              'skip': skip,
+                                              'exp_ratio': related_info['exp_ratio'],
+                                              'stride': 1})
+        # fix possible shape mismatch
+        # TODO: use formal method function to update parameters
+        if len(node.successors) == 1 and 'in_channels' in node.successors[0].operation.parameters:
+            node.successors[0].operation.parameters['in_channels'] = n_filter
\ No newline at end of file
--- a/test/convert_test/simple_strategy.py
+++ b/test/convert_test/simple_strategy.py
+import json
+import logging
+import random
+import os
+from nni.retiarii import Model, submit_models, wait_models
+from nni.retiarii import get_base_model_ir, get_specified_mutators, get_trainer
+from nni.retiarii import Sampler
+_logger = logging.getLogger(__name__)
+class RandomSampler(Sampler):
+    def choice(self, candidates, mutator, model, index):
+        return random.choice(candidates)
+def simple_startegy():
+    try:
+        _logger.info('stargety start...')
+        while True:
+            model = get_base_model_ir()
+            _logger.info('apply mutators...')
+            applied_mutators = get_specified_mutators()
+            _logger.info('mutators: {}'.format(applied_mutators))
+            random_sampler = RandomSampler()
+            for mutator in applied_mutators:
+                _logger.info('mutate model...')
+                mutator.bind_sampler(random_sampler)
+                model = mutator.apply(model)
+            # get and apply training approach
+            _logger.info('apply training approach...')
+            trainer = get_trainer()
+            model.apply_trainer(trainer['modulename'], trainer['args'])
+            # run models
+            submit_models(model)
+            wait_models(model)
+            _logger.info('Strategy says:', model.metric)
+    except Exception as e:
+        _logger.error(logging.exception('message'))
+if __name__ == '__main__':
+    simple_startegy()
--- a/test/convert_test/test.py
+++ b/test/convert_test/test.py
+import os
+import sys
+import torch
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[2]))
+from nni.retiarii.converter.graph_gen import convert_to_graph
+from nni.retiarii.converter.visualize import visualize_model
+from nni.retiarii import nn
+from nni.retiarii.codegen.pytorch import model_to_pytorch_script
+from base_mnasnet import MNASNet
+from nni.experiment import Experiment
+if __name__ == '__main__':
+    _DEFAULT_DEPTHS = [16, 24, 40, 80, 96, 192, 320]
+    _DEFAULT_CONVOPS = ["dconv", "mconv", "mconv", "mconv", "mconv", "mconv", "mconv"]
+    _DEFAULT_SKIPS = [False, True, True, True, True, True, True]
+    _DEFAULT_KERNEL_SIZES = [3, 3, 5, 5, 3, 5, 3]
+    _DEFAULT_NUM_LAYERS = [1, 3, 3, 3, 2, 4, 1]
+    nn.enable_record_args()
+    base_model = MNASNet(0.5, _DEFAULT_DEPTHS, _DEFAULT_CONVOPS, _DEFAULT_KERNEL_SIZES,
+                    _DEFAULT_NUM_LAYERS, _DEFAULT_SKIPS)
+    recorded_module_args = nn.get_records()
+    nn.disable_record_args()
+    print(recorded_module_args)
+    script_module = torch.jit.script(base_model)
+    model = convert_to_graph(script_module, base_model, recorded_module_args)
+    #code_script = model_to_pytorch_script(model)
+    #print(code_script)
+    print("Model: ", model)
+    graph_ir = model._dump()
+    print(graph_ir)
+    #visualize_model(graph_ir)
+    # TODO: new interface
+    #exp = Experiment()
+    #exp.start_retiarii_experiment(base_model, training_approach,
+    #                              applied_mutators, strategy,
+    #                              exp_config)
+    exp_config = {'authorName': 'nni',
+                  'experimentName': 'naive',
+                  'trialConcurrency': 3,
+                  'maxExecDuration': '1h',
+                  'maxTrialNum': 10,
+                  'trainingServicePlatform': 'local'
+                }
+    applied_mutators = [{'filepath': os.path.join(os.getcwd(), 'mutator.py'), 'classname': 'BlockMutator', 'args': {'target': 'mutable_0'}},
+                        {'filepath': os.path.join(os.getcwd(), 'mutator.py'), 'classname': 'BlockMutator', 'args': {'target': 'mutable_1'}}]
+    training_approach = {'modulename': 'nni.retiarii.trainer.PyTorchImageClassificationTrainer', 'args': {
+        "dataset_cls": "CIFAR10",
+        "dataset_kwargs": {
+                "root": "data/cifar10",
+                "download": True
+        },
+        "dataloader_kwargs": {
+            "batch_size": 32
+        },
+        "optimizer_kwargs": {
+            "lr": 1e-3
+        },
+        "trainer_kwargs": {
+            "max_epochs": 1
+        }
+    }}
+    strategy = {'filename': 'simple_strategy', 'funcname': 'simple_startegy', 'args': {}}
+    exp = Experiment()
+    exp.tmp_start_retiarii(graph_ir, training_approach,
+                           applied_mutators, strategy,
+                           exp_config)
\ No newline at end of file
--- a/test/ut/retiarii/mnist_pytorch.json
+++ b/test/ut/retiarii/mnist_pytorch.json
@@ -4,11 +4,11 @@
        "outputs": ["metric"],
        "nodes": {
-            "stem": {"type": "_cell", "cell": "stem"},
+            "stem": {"operation": {"type": "_cell", "cell_name": "stem"}},
-            "flatten": {"type": "Flatten"},
+            "flatten": {"operation": {"type": "Flatten"}},
-            "fc1": {"type": "Dense", "parameters": {"out_features": 256, "in_features": 1024}},
+            "fc1": {"operation": {"type": "Dense", "parameters": {"out_features": 256, "in_features": 1024}}},
-            "fc2": {"type": "Dense", "parameters": {"out_features": 10, "in_features": 256}},
+            "fc2": {"operation": {"type": "Dense", "parameters": {"out_features": 10, "in_features": 256}}},
-            "softmax": {"type": "Softmax"}
+            "softmax": {"operation": {"type": "Softmax"}}
        },
        "edges": [
@@ -23,10 +23,10 @@
    "stem": {
        "nodes": {
-            "conv1": {"type": "Conv2d", "parameters": {"out_channels": 32, "in_channels": 1, "kernel_size": 5}},
+            "conv1": {"operation": {"type": "Conv2d", "parameters": {"out_channels": 32, "in_channels": 1, "kernel_size": 5}}},
-            "pool1": {"type": "MaxPool2d", "parameters": {"kernel_size": 2}},
+            "pool1": {"operation": {"type": "MaxPool2d", "parameters": {"kernel_size": 2}}},
-            "conv2": {"type": "Conv2d", "parameters": {"out_channels": 64, "in_channels": 32, "kernel_size": 5}},
+            "conv2": {"operation": {"type": "Conv2d", "parameters": {"out_channels": 64, "in_channels": 32, "kernel_size": 5}}},
-            "pool2": {"type": "MaxPool2d", "parameters": {"kernel_size": 2}}
+            "pool2": {"operation": {"type": "MaxPool2d", "parameters": {"kernel_size": 2}}}
        },
        "edges": [

--- a/test/ut/retiarii/nni.yaml
+++ b/test/ut/retiarii/nni.yaml
@@ -13,6 +13,6 @@ advisor:
  classArgs:
    strategy: debug_strategy.single_model_startegy
 trial:
-  command: python -m nni.retiarii.trial_entry
+  command: python3 -m nni.retiarii.trial_entry
  codeDir: ../..
  gpuNum: 0