the source code of NNI for DCU

1011377c · qianyj · abc22158 · 1011377c · 1011377c · 1011377c
Commit 1011377c authored Mar 31, 2022 by qianyj
8 changed files
--- a/examples/nas/oneshot/pfld/__init__.py
+++ b/examples/nas/oneshot/pfld/__init__.py
--- a/examples/nas/oneshot/pfld/datasets.py
+++ b/examples/nas/oneshot/pfld/datasets.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+from __future__ import absolute_import, division, print_function
+import cv2
+import os
+import numpy as np
+from torch.utils import data
+class PFLDDatasets(data.Dataset):
+    """ Dataset to manage the data loading, augmentation and generation. """
+    def __init__(self, file_list, transforms=None, data_root="", img_size=112):
+        """
+        Parameters
+        ----------
+        file_list : list
+            a list of file path and annotations
+        transforms : function
+            function for data augmentation
+        data_root : str
+            the root path of dataset
+        img_size : int
+            the size of image height or width
+        """
+        self.line = None
+        self.path = None
+        self.img_size = img_size
+        self.land = None
+        self.angle = None
+        self.data_root = data_root
+        self.transforms = transforms
+        with open(file_list, "r") as f:
+            self.lines = f.readlines()
+    def __getitem__(self, index):
+        """ Get the data sample and labels with the index. """
+        self.line = self.lines[index].strip().split()
+        # load image
+        if self.data_root:
+            self.img = cv2.imread(os.path.join(self.data_root, self.line[0]))
+        else:
+            self.img = cv2.imread(self.line[0])
+        # resize
+        self.img = cv2.resize(self.img, (self.img_size, self.img_size))
+        # obtain gt labels
+        self.land = np.asarray(self.line[1: (106 * 2 + 1)], dtype=np.float32)
+        self.angle = np.asarray(self.line[(106 * 2 + 1):], dtype=np.float32)
+        # augmentation
+        if self.transforms:
+            self.img = self.transforms(self.img)
+        return self.img, self.land, self.angle
+    def __len__(self):
+        """ Get the size of dataset. """
+        return len(self.lines)
--- a/examples/nas/oneshot/pfld/export.py
+++ b/examples/nas/oneshot/pfld/export.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+from __future__ import absolute_import, division, print_function
+import argparse
+import onnx
+import onnxsim
+import os
+import torch
+from lib.builder import search_space
+from lib.ops import PRIMITIVES
+from nni.algorithms.nas.pytorch.fbnet import (
+    LookUpTable,
+    NASConfig,
+    model_init,
+)
+parser = argparse.ArgumentParser(description="Export the ONNX model")
+parser.add_argument("--net", default="subnet", type=str)
+parser.add_argument("--supernet", default="", type=str, metavar="PATH")
+parser.add_argument("--resume", default="", type=str, metavar="PATH")
+parser.add_argument("--num_points", default=106, type=int)
+parser.add_argument("--img_size", default=112, type=int)
+parser.add_argument("--onnx", default="./output/pfld.onnx", type=str)
+parser.add_argument("--onnx_sim", default="./output/subnet.onnx", type=str)
+args = parser.parse_args()
+os.makedirs("./output", exist_ok=True)
+if args.net == "subnet":
+    from lib.subnet import PFLDInference
+else:
+    raise ValueError("Network is not implemented")
+check = torch.load(args.supernet, map_location=torch.device("cpu"))
+sampled_arch = check["arch_sample"]
+nas_config = NASConfig(search_space=search_space)
+lookup_table = LookUpTable(config=nas_config, primitives=PRIMITIVES)
+pfld_backbone = PFLDInference(lookup_table, sampled_arch, args.num_points)
+pfld_backbone.eval()
+check_sub = torch.load(args.resume, map_location=torch.device("cpu"))
+param_dict = check_sub["pfld_backbone"]
+model_init(pfld_backbone, param_dict)
+print("Convert PyTorch model to ONNX.")
+dummy_input = torch.randn(1, 3, args.img_size, args.img_size)
+input_names = ["input"]
+output_names = ["output"]
+torch.onnx.export(
+    pfld_backbone,
+    dummy_input,
+    args.onnx,
+    verbose=True,
+    input_names=input_names,
+    output_names=output_names,
+)
+print("Check ONNX model.")
+model = onnx.load(args.onnx)
+print("Simplifying the ONNX model.")
+model_opt, check = onnxsim.simplify(args.onnx)
+assert check, "Simplified ONNX model could not be validated"
+onnx.save(model_opt, args.onnx_sim)
+print("Onnx model simplify Ok!")
--- a/examples/nas/oneshot/pfld/lib/__init__.py
+++ b/examples/nas/oneshot/pfld/lib/__init__.py
--- a/examples/nas/oneshot/pfld/lib/builder.py
+++ b/examples/nas/oneshot/pfld/lib/builder.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+from __future__ import absolute_import, division, print_function
+search_space = {
+    # multi-stage definition for candidate layers
+    # here two stages are defined for PFLD searching
+    "stages": {
+        "stage_0": {
+            "ops": [
+                "mb_k3_res",
+                "mb_k3_e2_res",
+                "mb_k3_res_d3",
+                "mb_k5_res",
+                "mb_k5_e2_res",
+                "sep_k3",
+                "sep_k5",
+                "gh_k3",
+                "gh_k5",
+            ],
+            "layer_num": 2,
+        },
+        "stage_1": {
+            "ops": [
+                "mb_k3_e2_res",
+                "mb_k3_e4_res",
+                "mb_k3_e2_res_se",
+                "mb_k3_res_d3",
+                "mb_k5_res",
+                "mb_k5_e2_res",
+                "mb_k5_res_se",
+                "mb_k5_e2_res_se",
+                "gh_k5",
+            ],
+            "layer_num": 3,
+        },
+    },
+    # necessary information of layers for NAS
+    # the basic information is as (input_channels, height, width)
+    "input_shape": [
+        (32, 14, 14),
+        (32, 14, 14),
+        (32, 14, 14),
+        (64, 7, 7),
+        (64, 7, 7),
+    ],
+    # output channels for each layer
+    "channel_size": [32, 32, 64, 64, 64],
+    # stride for each layer
+    "strides": [1, 1, 2, 1, 1],
+    # height of feature map for each layer
+    "fm_size": [14, 14, 7, 7, 7],
+}
--- a/examples/nas/oneshot/pfld/lib/ops.py
+++ b/examples/nas/oneshot/pfld/lib/ops.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+from __future__ import absolute_import, division, print_function
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# Basic primitives as the network path
+PRIMITIVES = {
+    "skip": lambda c_in, c_out, stride, **kwargs: Identity(
+        c_in, c_out, stride, **kwargs
+    ),
+    "conv1x1": lambda c_in, c_out, stride, **kwargs: Conv1x1(
+        c_in, c_out, stride, **kwargs
+    ),
+    "depth_conv": lambda c_in, c_out, stride, **kwargs: DepthConv(
+        c_in, c_out, stride, **kwargs
+    ),
+    "sep_k3": lambda c_in, c_out, stride, **kwargs: SeparableConv(
+        c_in, c_out, stride, **kwargs
+    ),
+    "sep_k5": lambda c_in, c_out, stride, **kwargs: SeparableConv(
+        c_in, c_out, stride, kernel=5, **kwargs
+    ),
+    "gh_k3": lambda c_in, c_out, stride, **kwargs: GhostModule(
+        c_in, c_out, stride, **kwargs
+    ),
+    "gh_k5": lambda c_in, c_out, stride, **kwargs: GhostModule(
+        c_in, c_out, stride, kernel=5, **kwargs
+    ),
+    "mb_k3": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=3, expand=1, **kwargs
+    ),
+    "mb_k3_e2": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=3, expand=2, **kwargs
+    ),
+    "mb_k3_e4": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=3, expand=4, **kwargs
+    ),
+    "mb_k3_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=3, expand=1, res=True, **kwargs
+    ),
+    "mb_k3_e2_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=3, expand=2, res=True, **kwargs
+    ),
+    "mb_k3_e4_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=3, expand=4, res=True, **kwargs
+    ),
+    "mb_k3_d2": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in,
+        c_out,
+        stride,
+        kernel=3,
+        expand=2,
+        res=False,
+        dilation=2,
+        **kwargs,
+    ),
+    "mb_k3_d3": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in,
+        c_out,
+        stride,
+        kernel=3,
+        expand=2,
+        res=False,
+        dilation=3,
+        **kwargs,
+    ),
+    "mb_k3_res_d2": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in,
+        c_out,
+        stride,
+        kernel=3,
+        expand=2,
+        res=True,
+        dilation=2,
+        **kwargs,
+    ),
+    "mb_k3_res_d3": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in,
+        c_out,
+        stride,
+        kernel=3,
+        expand=2,
+        res=True,
+        dilation=3,
+        **kwargs,
+    ),
+    "mb_k3_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in,
+        c_out,
+        stride,
+        kernel=3,
+        expand=1,
+        res=True,
+        dilation=1,
+        se=True,
+        **kwargs,
+    ),
+    "mb_k3_e2_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in,
+        c_out,
+        stride,
+        kernel=3,
+        expand=2,
+        res=True,
+        dilation=1,
+        se=True,
+        **kwargs,
+    ),
+    "mb_k3_e4_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in,
+        c_out,
+        stride,
+        kernel=3,
+        expand=4,
+        res=True,
+        dilation=1,
+        se=True,
+        **kwargs,
+    ),
+    "mb_k5": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=5, expand=1, **kwargs
+    ),
+    "mb_k5_e2": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=5, expand=2, **kwargs
+    ),
+    "mb_k5_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=5, expand=1, res=True, **kwargs
+    ),
+    "mb_k5_e2_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in, c_out, stride, kernel=5, expand=2, res=True, **kwargs
+    ),
+    "mb_k5_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in,
+        c_out,
+        stride,
+        kernel=5,
+        expand=1,
+        res=True,
+        dilation=1,
+        se=True,
+        **kwargs,
+    ),
+    "mb_k5_e2_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
+        c_in,
+        c_out,
+        stride,
+        kernel=5,
+        expand=2,
+        res=True,
+        dilation=1,
+        se=True,
+        **kwargs,
+    ),
+}
+def conv_bn(inp, oup, kernel, stride, pad=1, groups=1):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, kernel, stride, pad, groups=groups, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.ReLU(inplace=True),
+    )
+class SeparableConv(nn.Module):
+    """Separable convolution."""
+    def __init__(self, in_ch, out_ch, stride=1, kernel=3, fm_size=7):
+        super(SeparableConv, self).__init__()
+        assert stride in [1, 2], "stride should be in [1, 2]"
+        pad = kernel // 2
+        self.conv = nn.Sequential(
+            conv_bn(in_ch, in_ch, kernel, stride, pad=pad, groups=in_ch),
+            conv_bn(in_ch, out_ch, 1, 1, pad=0),
+        )
+    def forward(self, x):
+        return self.conv(x)
+class Conv1x1(nn.Module):
+    """1x1 convolution."""
+    def __init__(self, in_ch, out_ch, stride=1, kernel=1, fm_size=7):
+        super(Conv1x1, self).__init__()
+        assert stride in [1, 2], "stride should be in [1, 2]"
+        padding = kernel // 2
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, kernel, stride, padding),
+            nn.ReLU(inplace=True),
+        )
+    def forward(self, x):
+        return self.conv(x)
+class DepthConv(nn.Module):
+    """depth convolution."""
+    def __init__(self, in_ch, out_ch, stride=1, kernel=3, fm_size=7):
+        super(DepthConv, self).__init__()
+        assert stride in [1, 2], "stride should be in [1, 2]"
+        padding = kernel // 2
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_ch, in_ch, kernel, stride, padding, groups=in_ch),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_ch, out_ch, 1, 1, 0),
+            nn.ReLU(inplace=True),
+        )
+    def forward(self, x):
+        return self.conv(x)
+class GhostModule(nn.Module):
+    """Gost module."""
+    def __init__(self, in_ch, out_ch, stride=1, kernel=3, fm_size=7):
+        super(GhostModule, self).__init__()
+        mid_ch = out_ch // 2
+        self.primary_conv = conv_bn(in_ch, mid_ch, 1, stride, pad=0)
+        self.cheap_operation = conv_bn(
+            mid_ch, mid_ch, kernel, 1, kernel // 2, mid_ch
+        )
+    def forward(self, x):
+        x1 = self.primary_conv(x)
+        x2 = self.cheap_operation(x1)
+        return torch.cat([x1, x2], dim=1)
+class StemBlock(nn.Module):
+    def __init__(self, in_ch=3, init_ch=32, bottleneck=True):
+        super(StemBlock, self).__init__()
+        self.stem_1 = conv_bn(in_ch, init_ch, 3, 2, 1)
+        mid_ch = int(init_ch // 2) if bottleneck else init_ch
+        self.stem_2a = conv_bn(init_ch, mid_ch, 1, 1, 0)
+        self.stem_2b = SeparableConv(mid_ch, init_ch, 2, 1)
+        self.stem_2p = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.stem_3 = conv_bn(init_ch * 2, init_ch, 1, 1, 0)
+    def forward(self, x):
+        stem_1_out = self.stem_1(x)
+        stem_2a_out = self.stem_2a(stem_1_out)
+        stem_2b_out = self.stem_2b(stem_2a_out)
+        stem_2p_out = self.stem_2p(stem_1_out)
+        out = self.stem_3(torch.cat((stem_2b_out, stem_2p_out), 1))
+        return out, stem_1_out
+class Identity(nn.Module):
+    """ Identity module."""
+    def __init__(self, in_ch, out_ch, stride=1, fm_size=7):
+        super(Identity, self).__init__()
+        self.conv = (
+            conv_bn(in_ch, out_ch, kernel=1, stride=stride, pad=0)
+            if in_ch != out_ch or stride != 1
+            else None
+        )
+    def forward(self, x):
+        if self.conv:
+            out = self.conv(x)
+        else:
+            out = x
+            # Add dropout to avoid overfit on Identity (PDARTS)
+            out = nn.functional.dropout(out, p=0.5)
+        return out
+class Hsigmoid(nn.Module):
+    """Hsigmoid activation function."""
+    def __init__(self, inplace=True):
+        super(Hsigmoid, self).__init__()
+        self.inplace = inplace
+    def forward(self, x):
+        return F.relu6(x + 3.0, inplace=self.inplace) / 6.0
+class eSEModule(nn.Module):
+    """ The improved SE Module."""
+    def __init__(self, channel, fm_size=7, se=True):
+        super(eSEModule, self).__init__()
+        self.se = se
+        if self.se:
+            self.avg_pool = nn.Conv2d(
+                channel, channel, fm_size, 1, 0, groups=channel, bias=False
+            )
+            self.fc = nn.Conv2d(channel, channel, kernel_size=1, padding=0)
+            self.hsigmoid = Hsigmoid()
+    def forward(self, x):
+        if self.se:
+            input = x
+            x = self.avg_pool(x)
+            x = self.fc(x)
+            x = self.hsigmoid(x)
+            return input * x
+        else:
+            return x
+class ChannelShuffle(nn.Module):
+    """Procedure: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]."""
+    def __init__(self, groups):
+        super(ChannelShuffle, self).__init__()
+        self.groups = groups
+    def forward(self, x):
+        if self.groups == 1:
+            return x
+        N, C, H, W = x.size()
+        g = self.groups
+        assert C % g == 0, "group size {} is not for channel {}".format(g, C)
+        return (
+            x.view(N, g, int(C // g), H, W)
+            .permute(0, 2, 1, 3, 4)
+            .contiguous()
+            .view(N, C, H, W)
+        )
+class MBBlock(nn.Module):
+    """The Inverted Residual Block, with channel shuffle or eSEModule."""
+    def __init__(
+        self,
+        in_ch,
+        out_ch,
+        stride=1,
+        kernel=3,
+        expand=1,
+        res=False,
+        dilation=1,
+        se=False,
+        fm_size=7,
+        group=1,
+        mid_ch=-1,
+    ):
+        super(MBBlock, self).__init__()
+        assert stride in [1, 2], "stride should be in [1, 2]"
+        assert kernel in [3, 5], "kernel size should be in [3, 5]"
+        assert dilation in [1, 2, 3, 4], "dilation should be in [1, 2, 3, 4]"
+        assert group in [1, 2], "group should be in [1, 2]"
+        self.use_res_connect = res and (stride == 1)
+        padding = kernel // 2 + (dilation - 1)
+        mid_ch = mid_ch if mid_ch > 0 else (in_ch * expand)
+        # Basic Modules
+        conv_layer = nn.Conv2d
+        norm_layer = nn.BatchNorm2d
+        activation_layer = nn.ReLU
+        channel_suffle = ChannelShuffle
+        se_layer = eSEModule
+        self.ir_block = nn.Sequential(
+            # pointwise convolution
+            conv_layer(in_ch, mid_ch, 1, 1, 0, bias=False, groups=group),
+            norm_layer(mid_ch),
+            activation_layer(inplace=True),
+            # channel shuffle if necessary
+            channel_suffle(group),
+            # depthwise convolution
+            conv_layer(
+                mid_ch,
+                mid_ch,
+                kernel,
+                stride,
+                padding=padding,
+                dilation=dilation,
+                groups=mid_ch,
+                bias=False,
+            ),
+            norm_layer(mid_ch),
+            # eSEModule if necessary
+            se_layer(mid_ch, fm_size, se),
+            activation_layer(inplace=True),
+            # pointwise convolution
+            conv_layer(mid_ch, out_ch, 1, 1, 0, bias=False, groups=group),
+            norm_layer(out_ch),
+        )
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.ir_block(x)
+        else:
+            return self.ir_block(x)
+class SingleOperation(nn.Module):
+    """Single operation for sampled path."""
+    def __init__(self, layers_configs, stage_ops, sampled_op=""):
+        """
+        Parameters
+        ----------
+        layers_configs : list
+            the layer config: [input_channel, output_channel, stride, height]
+        stage_ops : dict
+            the pairs of op name and layer operator
+        sampled_op : str
+            the searched layer name
+        """
+        super(SingleOperation, self).__init__()
+        fm = {"fm_size": layers_configs[3]}
+        ops_names = [op_name for op_name in stage_ops]
+        sampled_op = sampled_op if sampled_op else ops_names[0]
+        # define the single op
+        self.op = stage_ops[sampled_op](*layers_configs[0:3], **fm)
+    def forward(self, x):
+        return self.op(x)
+def choice_blocks(layers_configs, stage_ops):
+    """
+    Create list of layer candidates for NNI one-shot NAS.
+    Parameters
+    ----------
+    layers_configs : list
+        the layer config: [input_channel, output_channel, stride, height]
+    stage_ops : dict
+        the pairs of op name and layer operator
+    Returns
+    -------
+    output: list
+        list of layer operators
+    """
+    ops_names = [op for op in stage_ops]
+    fm = {"fm_size": layers_configs[3]}
+    op_list = [stage_ops[op](*layers_configs[0:3], **fm) for op in ops_names]
+    return op_list
--- a/examples/nas/oneshot/pfld/lib/subnet.py
+++ b/examples/nas/oneshot/pfld/lib/subnet.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+from __future__ import absolute_import, division, print_function
+import torch
+import torch.nn as nn
+from lib.ops import (
+    MBBlock,
+    SeparableConv,
+    SingleOperation,
+    StemBlock,
+    conv_bn,
+)
+from torch.nn import init
+INIT_CH = 16
+class PFLDInference(nn.Module):
+    """ The subnet with the architecture of PFLD. """
+    def __init__(self, lookup_table, sampled_ops, num_points=106):
+        """
+        Parameters
+        ----------
+        lookup_table : class
+            to manage the candidate ops, layer information and layer perf
+        sampled_ops : list of str
+            the searched layer names of the subnet
+        num_points : int
+            the number of landmarks for prediction
+        """
+        super(PFLDInference, self).__init__()
+        stage_names = [stage_name for stage_name in lookup_table.layer_num]
+        stage_n = [lookup_table.layer_num[stage] for stage in stage_names]
+        self.stem = StemBlock(init_ch=INIT_CH, bottleneck=False)
+        self.block4_1 = MBBlock(INIT_CH, 32, stride=2, mid_ch=32)
+        stages_0 = [
+            SingleOperation(
+                lookup_table.layer_configs[layer_id],
+                lookup_table.lut_ops[stage_names[0]],
+                sampled_ops[layer_id],
+            )
+            for layer_id in range(stage_n[0])
+        ]
+        stages_1 = [
+            SingleOperation(
+                lookup_table.layer_configs[layer_id],
+                lookup_table.lut_ops[stage_names[1]],
+                sampled_ops[layer_id],
+            )
+            for layer_id in range(stage_n[0], stage_n[0] + stage_n[1])
+        ]
+        blocks = stages_0 + stages_1
+        self.blocks = nn.Sequential(*blocks)
+        self.avg_pool1 = nn.Conv2d(
+            INIT_CH, INIT_CH, 9, 8, 1, groups=INIT_CH, bias=False
+        )
+        self.avg_pool2 = nn.Conv2d(32, 32, 3, 2, 1, groups=32, bias=False)
+        self.block6_1 = nn.Conv2d(96 + INIT_CH, 64, 1, 1, 0, bias=False)
+        self.block6_2 = MBBlock(64, 64, res=True, se=True, mid_ch=128)
+        self.block6_3 = SeparableConv(64, 128, 1)
+        self.conv7 = nn.Conv2d(128, 128, 7, 1, 0, groups=128, bias=False)
+        self.fc = nn.Conv2d(128, num_points * 2, 1, 1, 0, bias=True)
+        # init params
+        self.init_params()
+    def init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, mode="fan_out")
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant_(m.weight, 1)
+                init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                init.normal_(m.weight, std=0.001)
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+    def forward(self, x):
+        """
+        Parameters
+        ----------
+        x : tensor
+            input image
+        Returns
+        -------
+        output: tensor
+            the predicted landmarks
+        output: tensor
+            the intermediate features
+        """
+        x, y1 = self.stem(x)
+        out1 = x
+        x = self.block4_1(x)
+        for i, block in enumerate(self.blocks):
+            x = block(x)
+            if i == 1:
+                y2 = x
+            elif i == 4:
+                y3 = x
+        y1 = self.avg_pool1(y1)
+        y2 = self.avg_pool2(y2)
+        multi_scale = torch.cat([y3, y2, y1], 1)
+        y = self.block6_1(multi_scale)
+        y = self.block6_2(y)
+        y = self.block6_3(y)
+        y = self.conv7(y)
+        landmarks = self.fc(y)
+        return landmarks, out1
+class AuxiliaryNet(nn.Module):
+    """ AuxiliaryNet to predict pose angles. """
+    def __init__(self):
+        super(AuxiliaryNet, self).__init__()
+        self.conv1 = conv_bn(INIT_CH, 64, 3, 2)
+        self.conv2 = conv_bn(64, 64, 3, 1)
+        self.conv3 = conv_bn(64, 32, 3, 2)
+        self.conv4 = conv_bn(32, 64, 7, 1)
+        self.max_pool1 = nn.MaxPool2d(3)
+        self.fc1 = nn.Linear(64, 32)
+        self.fc2 = nn.Linear(32, 3)
+    def forward(self, x):
+        """
+        Parameters
+        ----------
+        x : tensor
+            input intermediate features
+        Returns
+        -------
+        output: tensor
+            the predicted pose angles
+        """
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.max_pool1(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc1(x)
+        x = self.fc2(x)
+        return x
--- a/examples/nas/oneshot/pfld/lib/supernet.py
+++ b/examples/nas/oneshot/pfld/lib/supernet.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+from __future__ import absolute_import, division, print_function
+import torch
+import torch.nn as nn
+from lib.ops import (
+    MBBlock,
+    SeparableConv,
+    StemBlock,
+    choice_blocks,
+    conv_bn,
+)
+from nni.nas.pytorch import mutables
+from torch.nn import init
+INIT_CH = 16
+class PFLDInference(nn.Module):
+    """ PFLD model for facial landmark."""
+    def __init__(self, lookup_table, num_points=106):
+        """
+        Parameters
+        ----------
+        lookup_table : class
+            to manage the candidate ops, layer information and layer perf
+        num_points : int
+            the number of landmarks for prediction
+        """
+        super(PFLDInference, self).__init__()
+        stage_names = [stage for stage in lookup_table.layer_num]
+        stage_lnum = [lookup_table.layer_num[stage] for stage in stage_names]
+        self.stem = StemBlock(init_ch=INIT_CH, bottleneck=False)
+        self.block4_1 = MBBlock(INIT_CH, 32, stride=2, mid_ch=32)
+        stages_0 = [
+            mutables.LayerChoice(
+                choice_blocks(
+                    lookup_table.layer_configs[layer_id],
+                    lookup_table.lut_ops[stage_names[0]],
+                )
+            )
+            for layer_id in range(stage_lnum[0])
+        ]
+        stages_1 = [
+            mutables.LayerChoice(
+                choice_blocks(
+                    lookup_table.layer_configs[layer_id],
+                    lookup_table.lut_ops[stage_names[1]],
+                )
+            )
+            for layer_id in range(stage_lnum[0], stage_lnum[0] + stage_lnum[1])
+        ]
+        blocks = stages_0 + stages_1
+        self.blocks = nn.Sequential(*blocks)
+        self.avg_pool1 = nn.Conv2d(
+            INIT_CH, INIT_CH, 9, 8, 1, groups=INIT_CH, bias=False
+        )
+        self.avg_pool2 = nn.Conv2d(32, 32, 3, 2, 1, groups=32, bias=False)
+        self.block6_1 = nn.Conv2d(96 + INIT_CH, 64, 1, 1, 0, bias=False)
+        self.block6_2 = MBBlock(64, 64, res=True, se=True, mid_ch=128)
+        self.block6_3 = SeparableConv(64, 128, 1)
+        self.conv7 = nn.Conv2d(128, 128, 7, 1, 0, groups=128, bias=False)
+        self.fc = nn.Conv2d(128, num_points * 2, 1, 1, 0, bias=True)
+        # init params
+        self.init_params()
+    def init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, mode="fan_out")
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant_(m.weight, 1)
+                init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                init.normal_(m.weight, std=0.001)
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+    def forward(self, x):
+        """
+        Parameters
+        ----------
+        x : tensor
+            input image
+        Returns
+        -------
+        output: tensor
+            the predicted landmarks
+        output: tensor
+            the intermediate features
+        """
+        x, y1 = self.stem(x)
+        out1 = x
+        x = self.block4_1(x)
+        for i, block in enumerate(self.blocks):
+            x = block(x)
+            if i == 1:
+                y2 = x
+            elif i == 4:
+                y3 = x
+        y1 = self.avg_pool1(y1)
+        y2 = self.avg_pool2(y2)
+        multi_scale = torch.cat([y3, y2, y1], 1)
+        y = self.block6_1(multi_scale)
+        y = self.block6_2(y)
+        y = self.block6_3(y)
+        y = self.conv7(y)
+        landmarks = self.fc(y)
+        return landmarks, out1
+class AuxiliaryNet(nn.Module):
+    """ AuxiliaryNet to predict pose angles. """
+    def __init__(self):
+        super(AuxiliaryNet, self).__init__()
+        self.conv1 = conv_bn(INIT_CH, 64, 3, 2)
+        self.conv2 = conv_bn(64, 64, 3, 1)
+        self.conv3 = conv_bn(64, 32, 3, 2)
+        self.conv4 = conv_bn(32, 64, 7, 1)
+        self.max_pool1 = nn.MaxPool2d(3)
+        self.fc1 = nn.Linear(64, 32)
+        self.fc2 = nn.Linear(32, 3)
+    def forward(self, x):
+        """
+        Parameters
+        ----------
+        x : tensor
+            input intermediate features
+        Returns
+        -------
+        output: tensor
+            the predicted pose angles
+        """
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.max_pool1(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc1(x)
+        x = self.fc2(x)
+        return x