Commit 1011377c authored by qianyj's avatar qianyj
Browse files

the source code of NNI for DCU

parent abc22158
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from __future__ import absolute_import, division, print_function
import cv2
import os
import numpy as np
from torch.utils import data
class PFLDDatasets(data.Dataset):
""" Dataset to manage the data loading, augmentation and generation. """
def __init__(self, file_list, transforms=None, data_root="", img_size=112):
"""
Parameters
----------
file_list : list
a list of file path and annotations
transforms : function
function for data augmentation
data_root : str
the root path of dataset
img_size : int
the size of image height or width
"""
self.line = None
self.path = None
self.img_size = img_size
self.land = None
self.angle = None
self.data_root = data_root
self.transforms = transforms
with open(file_list, "r") as f:
self.lines = f.readlines()
def __getitem__(self, index):
""" Get the data sample and labels with the index. """
self.line = self.lines[index].strip().split()
# load image
if self.data_root:
self.img = cv2.imread(os.path.join(self.data_root, self.line[0]))
else:
self.img = cv2.imread(self.line[0])
# resize
self.img = cv2.resize(self.img, (self.img_size, self.img_size))
# obtain gt labels
self.land = np.asarray(self.line[1: (106 * 2 + 1)], dtype=np.float32)
self.angle = np.asarray(self.line[(106 * 2 + 1):], dtype=np.float32)
# augmentation
if self.transforms:
self.img = self.transforms(self.img)
return self.img, self.land, self.angle
def __len__(self):
""" Get the size of dataset. """
return len(self.lines)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from __future__ import absolute_import, division, print_function
import argparse
import onnx
import onnxsim
import os
import torch
from lib.builder import search_space
from lib.ops import PRIMITIVES
from nni.algorithms.nas.pytorch.fbnet import (
LookUpTable,
NASConfig,
model_init,
)
parser = argparse.ArgumentParser(description="Export the ONNX model")
parser.add_argument("--net", default="subnet", type=str)
parser.add_argument("--supernet", default="", type=str, metavar="PATH")
parser.add_argument("--resume", default="", type=str, metavar="PATH")
parser.add_argument("--num_points", default=106, type=int)
parser.add_argument("--img_size", default=112, type=int)
parser.add_argument("--onnx", default="./output/pfld.onnx", type=str)
parser.add_argument("--onnx_sim", default="./output/subnet.onnx", type=str)
args = parser.parse_args()
os.makedirs("./output", exist_ok=True)
if args.net == "subnet":
from lib.subnet import PFLDInference
else:
raise ValueError("Network is not implemented")
check = torch.load(args.supernet, map_location=torch.device("cpu"))
sampled_arch = check["arch_sample"]
nas_config = NASConfig(search_space=search_space)
lookup_table = LookUpTable(config=nas_config, primitives=PRIMITIVES)
pfld_backbone = PFLDInference(lookup_table, sampled_arch, args.num_points)
pfld_backbone.eval()
check_sub = torch.load(args.resume, map_location=torch.device("cpu"))
param_dict = check_sub["pfld_backbone"]
model_init(pfld_backbone, param_dict)
print("Convert PyTorch model to ONNX.")
dummy_input = torch.randn(1, 3, args.img_size, args.img_size)
input_names = ["input"]
output_names = ["output"]
torch.onnx.export(
pfld_backbone,
dummy_input,
args.onnx,
verbose=True,
input_names=input_names,
output_names=output_names,
)
print("Check ONNX model.")
model = onnx.load(args.onnx)
print("Simplifying the ONNX model.")
model_opt, check = onnxsim.simplify(args.onnx)
assert check, "Simplified ONNX model could not be validated"
onnx.save(model_opt, args.onnx_sim)
print("Onnx model simplify Ok!")
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from __future__ import absolute_import, division, print_function
search_space = {
# multi-stage definition for candidate layers
# here two stages are defined for PFLD searching
"stages": {
"stage_0": {
"ops": [
"mb_k3_res",
"mb_k3_e2_res",
"mb_k3_res_d3",
"mb_k5_res",
"mb_k5_e2_res",
"sep_k3",
"sep_k5",
"gh_k3",
"gh_k5",
],
"layer_num": 2,
},
"stage_1": {
"ops": [
"mb_k3_e2_res",
"mb_k3_e4_res",
"mb_k3_e2_res_se",
"mb_k3_res_d3",
"mb_k5_res",
"mb_k5_e2_res",
"mb_k5_res_se",
"mb_k5_e2_res_se",
"gh_k5",
],
"layer_num": 3,
},
},
# necessary information of layers for NAS
# the basic information is as (input_channels, height, width)
"input_shape": [
(32, 14, 14),
(32, 14, 14),
(32, 14, 14),
(64, 7, 7),
(64, 7, 7),
],
# output channels for each layer
"channel_size": [32, 32, 64, 64, 64],
# stride for each layer
"strides": [1, 1, 2, 1, 1],
# height of feature map for each layer
"fm_size": [14, 14, 7, 7, 7],
}
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from __future__ import absolute_import, division, print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
# Basic primitives as the network path
PRIMITIVES = {
"skip": lambda c_in, c_out, stride, **kwargs: Identity(
c_in, c_out, stride, **kwargs
),
"conv1x1": lambda c_in, c_out, stride, **kwargs: Conv1x1(
c_in, c_out, stride, **kwargs
),
"depth_conv": lambda c_in, c_out, stride, **kwargs: DepthConv(
c_in, c_out, stride, **kwargs
),
"sep_k3": lambda c_in, c_out, stride, **kwargs: SeparableConv(
c_in, c_out, stride, **kwargs
),
"sep_k5": lambda c_in, c_out, stride, **kwargs: SeparableConv(
c_in, c_out, stride, kernel=5, **kwargs
),
"gh_k3": lambda c_in, c_out, stride, **kwargs: GhostModule(
c_in, c_out, stride, **kwargs
),
"gh_k5": lambda c_in, c_out, stride, **kwargs: GhostModule(
c_in, c_out, stride, kernel=5, **kwargs
),
"mb_k3": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=3, expand=1, **kwargs
),
"mb_k3_e2": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=3, expand=2, **kwargs
),
"mb_k3_e4": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=3, expand=4, **kwargs
),
"mb_k3_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=3, expand=1, res=True, **kwargs
),
"mb_k3_e2_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=3, expand=2, res=True, **kwargs
),
"mb_k3_e4_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=3, expand=4, res=True, **kwargs
),
"mb_k3_d2": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in,
c_out,
stride,
kernel=3,
expand=2,
res=False,
dilation=2,
**kwargs,
),
"mb_k3_d3": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in,
c_out,
stride,
kernel=3,
expand=2,
res=False,
dilation=3,
**kwargs,
),
"mb_k3_res_d2": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in,
c_out,
stride,
kernel=3,
expand=2,
res=True,
dilation=2,
**kwargs,
),
"mb_k3_res_d3": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in,
c_out,
stride,
kernel=3,
expand=2,
res=True,
dilation=3,
**kwargs,
),
"mb_k3_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in,
c_out,
stride,
kernel=3,
expand=1,
res=True,
dilation=1,
se=True,
**kwargs,
),
"mb_k3_e2_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in,
c_out,
stride,
kernel=3,
expand=2,
res=True,
dilation=1,
se=True,
**kwargs,
),
"mb_k3_e4_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in,
c_out,
stride,
kernel=3,
expand=4,
res=True,
dilation=1,
se=True,
**kwargs,
),
"mb_k5": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=5, expand=1, **kwargs
),
"mb_k5_e2": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=5, expand=2, **kwargs
),
"mb_k5_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=5, expand=1, res=True, **kwargs
),
"mb_k5_e2_res": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in, c_out, stride, kernel=5, expand=2, res=True, **kwargs
),
"mb_k5_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in,
c_out,
stride,
kernel=5,
expand=1,
res=True,
dilation=1,
se=True,
**kwargs,
),
"mb_k5_e2_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock(
c_in,
c_out,
stride,
kernel=5,
expand=2,
res=True,
dilation=1,
se=True,
**kwargs,
),
}
def conv_bn(inp, oup, kernel, stride, pad=1, groups=1):
return nn.Sequential(
nn.Conv2d(inp, oup, kernel, stride, pad, groups=groups, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True),
)
class SeparableConv(nn.Module):
"""Separable convolution."""
def __init__(self, in_ch, out_ch, stride=1, kernel=3, fm_size=7):
super(SeparableConv, self).__init__()
assert stride in [1, 2], "stride should be in [1, 2]"
pad = kernel // 2
self.conv = nn.Sequential(
conv_bn(in_ch, in_ch, kernel, stride, pad=pad, groups=in_ch),
conv_bn(in_ch, out_ch, 1, 1, pad=0),
)
def forward(self, x):
return self.conv(x)
class Conv1x1(nn.Module):
"""1x1 convolution."""
def __init__(self, in_ch, out_ch, stride=1, kernel=1, fm_size=7):
super(Conv1x1, self).__init__()
assert stride in [1, 2], "stride should be in [1, 2]"
padding = kernel // 2
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, kernel, stride, padding),
nn.ReLU(inplace=True),
)
def forward(self, x):
return self.conv(x)
class DepthConv(nn.Module):
"""depth convolution."""
def __init__(self, in_ch, out_ch, stride=1, kernel=3, fm_size=7):
super(DepthConv, self).__init__()
assert stride in [1, 2], "stride should be in [1, 2]"
padding = kernel // 2
self.conv = nn.Sequential(
nn.Conv2d(in_ch, in_ch, kernel, stride, padding, groups=in_ch),
nn.ReLU(inplace=True),
nn.Conv2d(in_ch, out_ch, 1, 1, 0),
nn.ReLU(inplace=True),
)
def forward(self, x):
return self.conv(x)
class GhostModule(nn.Module):
"""Gost module."""
def __init__(self, in_ch, out_ch, stride=1, kernel=3, fm_size=7):
super(GhostModule, self).__init__()
mid_ch = out_ch // 2
self.primary_conv = conv_bn(in_ch, mid_ch, 1, stride, pad=0)
self.cheap_operation = conv_bn(
mid_ch, mid_ch, kernel, 1, kernel // 2, mid_ch
)
def forward(self, x):
x1 = self.primary_conv(x)
x2 = self.cheap_operation(x1)
return torch.cat([x1, x2], dim=1)
class StemBlock(nn.Module):
def __init__(self, in_ch=3, init_ch=32, bottleneck=True):
super(StemBlock, self).__init__()
self.stem_1 = conv_bn(in_ch, init_ch, 3, 2, 1)
mid_ch = int(init_ch // 2) if bottleneck else init_ch
self.stem_2a = conv_bn(init_ch, mid_ch, 1, 1, 0)
self.stem_2b = SeparableConv(mid_ch, init_ch, 2, 1)
self.stem_2p = nn.MaxPool2d(kernel_size=2, stride=2)
self.stem_3 = conv_bn(init_ch * 2, init_ch, 1, 1, 0)
def forward(self, x):
stem_1_out = self.stem_1(x)
stem_2a_out = self.stem_2a(stem_1_out)
stem_2b_out = self.stem_2b(stem_2a_out)
stem_2p_out = self.stem_2p(stem_1_out)
out = self.stem_3(torch.cat((stem_2b_out, stem_2p_out), 1))
return out, stem_1_out
class Identity(nn.Module):
""" Identity module."""
def __init__(self, in_ch, out_ch, stride=1, fm_size=7):
super(Identity, self).__init__()
self.conv = (
conv_bn(in_ch, out_ch, kernel=1, stride=stride, pad=0)
if in_ch != out_ch or stride != 1
else None
)
def forward(self, x):
if self.conv:
out = self.conv(x)
else:
out = x
# Add dropout to avoid overfit on Identity (PDARTS)
out = nn.functional.dropout(out, p=0.5)
return out
class Hsigmoid(nn.Module):
"""Hsigmoid activation function."""
def __init__(self, inplace=True):
super(Hsigmoid, self).__init__()
self.inplace = inplace
def forward(self, x):
return F.relu6(x + 3.0, inplace=self.inplace) / 6.0
class eSEModule(nn.Module):
""" The improved SE Module."""
def __init__(self, channel, fm_size=7, se=True):
super(eSEModule, self).__init__()
self.se = se
if self.se:
self.avg_pool = nn.Conv2d(
channel, channel, fm_size, 1, 0, groups=channel, bias=False
)
self.fc = nn.Conv2d(channel, channel, kernel_size=1, padding=0)
self.hsigmoid = Hsigmoid()
def forward(self, x):
if self.se:
input = x
x = self.avg_pool(x)
x = self.fc(x)
x = self.hsigmoid(x)
return input * x
else:
return x
class ChannelShuffle(nn.Module):
"""Procedure: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]."""
def __init__(self, groups):
super(ChannelShuffle, self).__init__()
self.groups = groups
def forward(self, x):
if self.groups == 1:
return x
N, C, H, W = x.size()
g = self.groups
assert C % g == 0, "group size {} is not for channel {}".format(g, C)
return (
x.view(N, g, int(C // g), H, W)
.permute(0, 2, 1, 3, 4)
.contiguous()
.view(N, C, H, W)
)
class MBBlock(nn.Module):
"""The Inverted Residual Block, with channel shuffle or eSEModule."""
def __init__(
self,
in_ch,
out_ch,
stride=1,
kernel=3,
expand=1,
res=False,
dilation=1,
se=False,
fm_size=7,
group=1,
mid_ch=-1,
):
super(MBBlock, self).__init__()
assert stride in [1, 2], "stride should be in [1, 2]"
assert kernel in [3, 5], "kernel size should be in [3, 5]"
assert dilation in [1, 2, 3, 4], "dilation should be in [1, 2, 3, 4]"
assert group in [1, 2], "group should be in [1, 2]"
self.use_res_connect = res and (stride == 1)
padding = kernel // 2 + (dilation - 1)
mid_ch = mid_ch if mid_ch > 0 else (in_ch * expand)
# Basic Modules
conv_layer = nn.Conv2d
norm_layer = nn.BatchNorm2d
activation_layer = nn.ReLU
channel_suffle = ChannelShuffle
se_layer = eSEModule
self.ir_block = nn.Sequential(
# pointwise convolution
conv_layer(in_ch, mid_ch, 1, 1, 0, bias=False, groups=group),
norm_layer(mid_ch),
activation_layer(inplace=True),
# channel shuffle if necessary
channel_suffle(group),
# depthwise convolution
conv_layer(
mid_ch,
mid_ch,
kernel,
stride,
padding=padding,
dilation=dilation,
groups=mid_ch,
bias=False,
),
norm_layer(mid_ch),
# eSEModule if necessary
se_layer(mid_ch, fm_size, se),
activation_layer(inplace=True),
# pointwise convolution
conv_layer(mid_ch, out_ch, 1, 1, 0, bias=False, groups=group),
norm_layer(out_ch),
)
def forward(self, x):
if self.use_res_connect:
return x + self.ir_block(x)
else:
return self.ir_block(x)
class SingleOperation(nn.Module):
"""Single operation for sampled path."""
def __init__(self, layers_configs, stage_ops, sampled_op=""):
"""
Parameters
----------
layers_configs : list
the layer config: [input_channel, output_channel, stride, height]
stage_ops : dict
the pairs of op name and layer operator
sampled_op : str
the searched layer name
"""
super(SingleOperation, self).__init__()
fm = {"fm_size": layers_configs[3]}
ops_names = [op_name for op_name in stage_ops]
sampled_op = sampled_op if sampled_op else ops_names[0]
# define the single op
self.op = stage_ops[sampled_op](*layers_configs[0:3], **fm)
def forward(self, x):
return self.op(x)
def choice_blocks(layers_configs, stage_ops):
"""
Create list of layer candidates for NNI one-shot NAS.
Parameters
----------
layers_configs : list
the layer config: [input_channel, output_channel, stride, height]
stage_ops : dict
the pairs of op name and layer operator
Returns
-------
output: list
list of layer operators
"""
ops_names = [op for op in stage_ops]
fm = {"fm_size": layers_configs[3]}
op_list = [stage_ops[op](*layers_configs[0:3], **fm) for op in ops_names]
return op_list
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from __future__ import absolute_import, division, print_function
import torch
import torch.nn as nn
from lib.ops import (
MBBlock,
SeparableConv,
SingleOperation,
StemBlock,
conv_bn,
)
from torch.nn import init
INIT_CH = 16
class PFLDInference(nn.Module):
""" The subnet with the architecture of PFLD. """
def __init__(self, lookup_table, sampled_ops, num_points=106):
"""
Parameters
----------
lookup_table : class
to manage the candidate ops, layer information and layer perf
sampled_ops : list of str
the searched layer names of the subnet
num_points : int
the number of landmarks for prediction
"""
super(PFLDInference, self).__init__()
stage_names = [stage_name for stage_name in lookup_table.layer_num]
stage_n = [lookup_table.layer_num[stage] for stage in stage_names]
self.stem = StemBlock(init_ch=INIT_CH, bottleneck=False)
self.block4_1 = MBBlock(INIT_CH, 32, stride=2, mid_ch=32)
stages_0 = [
SingleOperation(
lookup_table.layer_configs[layer_id],
lookup_table.lut_ops[stage_names[0]],
sampled_ops[layer_id],
)
for layer_id in range(stage_n[0])
]
stages_1 = [
SingleOperation(
lookup_table.layer_configs[layer_id],
lookup_table.lut_ops[stage_names[1]],
sampled_ops[layer_id],
)
for layer_id in range(stage_n[0], stage_n[0] + stage_n[1])
]
blocks = stages_0 + stages_1
self.blocks = nn.Sequential(*blocks)
self.avg_pool1 = nn.Conv2d(
INIT_CH, INIT_CH, 9, 8, 1, groups=INIT_CH, bias=False
)
self.avg_pool2 = nn.Conv2d(32, 32, 3, 2, 1, groups=32, bias=False)
self.block6_1 = nn.Conv2d(96 + INIT_CH, 64, 1, 1, 0, bias=False)
self.block6_2 = MBBlock(64, 64, res=True, se=True, mid_ch=128)
self.block6_3 = SeparableConv(64, 128, 1)
self.conv7 = nn.Conv2d(128, 128, 7, 1, 0, groups=128, bias=False)
self.fc = nn.Conv2d(128, num_points * 2, 1, 1, 0, bias=True)
# init params
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
"""
Parameters
----------
x : tensor
input image
Returns
-------
output: tensor
the predicted landmarks
output: tensor
the intermediate features
"""
x, y1 = self.stem(x)
out1 = x
x = self.block4_1(x)
for i, block in enumerate(self.blocks):
x = block(x)
if i == 1:
y2 = x
elif i == 4:
y3 = x
y1 = self.avg_pool1(y1)
y2 = self.avg_pool2(y2)
multi_scale = torch.cat([y3, y2, y1], 1)
y = self.block6_1(multi_scale)
y = self.block6_2(y)
y = self.block6_3(y)
y = self.conv7(y)
landmarks = self.fc(y)
return landmarks, out1
class AuxiliaryNet(nn.Module):
""" AuxiliaryNet to predict pose angles. """
def __init__(self):
super(AuxiliaryNet, self).__init__()
self.conv1 = conv_bn(INIT_CH, 64, 3, 2)
self.conv2 = conv_bn(64, 64, 3, 1)
self.conv3 = conv_bn(64, 32, 3, 2)
self.conv4 = conv_bn(32, 64, 7, 1)
self.max_pool1 = nn.MaxPool2d(3)
self.fc1 = nn.Linear(64, 32)
self.fc2 = nn.Linear(32, 3)
def forward(self, x):
"""
Parameters
----------
x : tensor
input intermediate features
Returns
-------
output: tensor
the predicted pose angles
"""
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.max_pool1(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = self.fc2(x)
return x
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from __future__ import absolute_import, division, print_function
import torch
import torch.nn as nn
from lib.ops import (
MBBlock,
SeparableConv,
StemBlock,
choice_blocks,
conv_bn,
)
from nni.nas.pytorch import mutables
from torch.nn import init
INIT_CH = 16
class PFLDInference(nn.Module):
""" PFLD model for facial landmark."""
def __init__(self, lookup_table, num_points=106):
"""
Parameters
----------
lookup_table : class
to manage the candidate ops, layer information and layer perf
num_points : int
the number of landmarks for prediction
"""
super(PFLDInference, self).__init__()
stage_names = [stage for stage in lookup_table.layer_num]
stage_lnum = [lookup_table.layer_num[stage] for stage in stage_names]
self.stem = StemBlock(init_ch=INIT_CH, bottleneck=False)
self.block4_1 = MBBlock(INIT_CH, 32, stride=2, mid_ch=32)
stages_0 = [
mutables.LayerChoice(
choice_blocks(
lookup_table.layer_configs[layer_id],
lookup_table.lut_ops[stage_names[0]],
)
)
for layer_id in range(stage_lnum[0])
]
stages_1 = [
mutables.LayerChoice(
choice_blocks(
lookup_table.layer_configs[layer_id],
lookup_table.lut_ops[stage_names[1]],
)
)
for layer_id in range(stage_lnum[0], stage_lnum[0] + stage_lnum[1])
]
blocks = stages_0 + stages_1
self.blocks = nn.Sequential(*blocks)
self.avg_pool1 = nn.Conv2d(
INIT_CH, INIT_CH, 9, 8, 1, groups=INIT_CH, bias=False
)
self.avg_pool2 = nn.Conv2d(32, 32, 3, 2, 1, groups=32, bias=False)
self.block6_1 = nn.Conv2d(96 + INIT_CH, 64, 1, 1, 0, bias=False)
self.block6_2 = MBBlock(64, 64, res=True, se=True, mid_ch=128)
self.block6_3 = SeparableConv(64, 128, 1)
self.conv7 = nn.Conv2d(128, 128, 7, 1, 0, groups=128, bias=False)
self.fc = nn.Conv2d(128, num_points * 2, 1, 1, 0, bias=True)
# init params
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
"""
Parameters
----------
x : tensor
input image
Returns
-------
output: tensor
the predicted landmarks
output: tensor
the intermediate features
"""
x, y1 = self.stem(x)
out1 = x
x = self.block4_1(x)
for i, block in enumerate(self.blocks):
x = block(x)
if i == 1:
y2 = x
elif i == 4:
y3 = x
y1 = self.avg_pool1(y1)
y2 = self.avg_pool2(y2)
multi_scale = torch.cat([y3, y2, y1], 1)
y = self.block6_1(multi_scale)
y = self.block6_2(y)
y = self.block6_3(y)
y = self.conv7(y)
landmarks = self.fc(y)
return landmarks, out1
class AuxiliaryNet(nn.Module):
""" AuxiliaryNet to predict pose angles. """
def __init__(self):
super(AuxiliaryNet, self).__init__()
self.conv1 = conv_bn(INIT_CH, 64, 3, 2)
self.conv2 = conv_bn(64, 64, 3, 1)
self.conv3 = conv_bn(64, 32, 3, 2)
self.conv4 = conv_bn(32, 64, 7, 1)
self.max_pool1 = nn.MaxPool2d(3)
self.fc1 = nn.Linear(64, 32)
self.fc2 = nn.Linear(32, 3)
def forward(self, x):
"""
Parameters
----------
x : tensor
input intermediate features
Returns
-------
output: tensor
the predicted pose angles
"""
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.max_pool1(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = self.fc2(x)
return x
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment