det_resnet_vd.py 10.4 KB
Newer Older
WenmuZhou's avatar
WenmuZhou committed
1
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
LDOUBLEV's avatar
LDOUBLEV committed
2
#
WenmuZhou's avatar
WenmuZhou committed
3
4
5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
LDOUBLEV's avatar
LDOUBLEV committed
6
7
8
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
WenmuZhou's avatar
WenmuZhou committed
9
10
11
12
13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
LDOUBLEV's avatar
LDOUBLEV committed
14
15
16
17
18

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

WenmuZhou's avatar
WenmuZhou committed
19
20
21
from paddle import nn
from paddle.nn import functional as F
from paddle import ParamAttr
LDOUBLEV's avatar
LDOUBLEV committed
22
23
24
25

__all__ = ["ResNet"]


WenmuZhou's avatar
WenmuZhou committed
26
27
class ResNet(nn.Layer):
    def __init__(self, in_channels=3, layers=50, **kwargs):
LDOUBLEV's avatar
LDOUBLEV committed
28
29
30
31
32
        """
        the Resnet backbone network for detection module.
        Args:
            params(dict): the super parameters for network build
        """
WenmuZhou's avatar
WenmuZhou committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
        super(ResNet, self).__init__()
        supported_layers = {
            18: {
                'depth': [2, 2, 2, 2],
                'block_class': BasicBlock
            },
            34: {
                'depth': [3, 4, 6, 3],
                'block_class': BasicBlock
            },
            50: {
                'depth': [3, 4, 6, 3],
                'block_class': BottleneckBlock
            },
            101: {
                'depth': [3, 4, 23, 3],
                'block_class': BottleneckBlock
            },
            152: {
                'depth': [3, 8, 36, 3],
                'block_class': BottleneckBlock
            },
            200: {
                'depth': [3, 12, 48, 3],
                'block_class': BottleneckBlock
            }
        }
        assert layers in supported_layers, \
            "supported layers are {} but input layer is {}".format(supported_layers.keys(), layers)
        is_3x3 = True

        depth = supported_layers[layers]['depth']
        block_class = supported_layers[layers]['block_class']

LDOUBLEV's avatar
LDOUBLEV committed
67
68
        num_filters = [64, 128, 256, 512]

WenmuZhou's avatar
WenmuZhou committed
69
        conv = []
LDOUBLEV's avatar
LDOUBLEV committed
70
        if is_3x3 == False:
WenmuZhou's avatar
WenmuZhou committed
71
72
73
74
75
76
77
            conv.append(
                ConvBNLayer(
                    in_channels=in_channels,
                    out_channels=64,
                    kernel_size=7,
                    stride=2,
                    act='relu'))
LDOUBLEV's avatar
LDOUBLEV committed
78
        else:
WenmuZhou's avatar
WenmuZhou committed
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
            conv.append(
                ConvBNLayer(
                    in_channels=3,
                    out_channels=32,
                    kernel_size=3,
                    stride=2,
                    act='relu',
                    name='conv1_1'))
            conv.append(
                ConvBNLayer(
                    in_channels=32,
                    out_channels=32,
                    kernel_size=3,
                    stride=1,
                    act='relu',
                    name='conv1_2'))
            conv.append(
                ConvBNLayer(
                    in_channels=32,
                    out_channels=64,
                    kernel_size=3,
                    stride=1,
                    act='relu',
                    name='conv1_3'))
        self.conv1 = nn.Sequential(*conv)
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.stages = []
        self.out_channels = []
        in_ch = 64
        for block_index in range(len(depth)):
            block_list = []
            for i in range(depth[block_index]):
                if layers >= 50:
                    if layers in [101, 152, 200] and block_index == 2:
LDOUBLEV's avatar
LDOUBLEV committed
113
                        if i == 0:
WenmuZhou's avatar
WenmuZhou committed
114
                            conv_name = "res" + str(block_index + 2) + "a"
LDOUBLEV's avatar
LDOUBLEV committed
115
                        else:
WenmuZhou's avatar
WenmuZhou committed
116
117
                            conv_name = "res" + str(block_index +
                                                    2) + "b" + str(i)
LDOUBLEV's avatar
LDOUBLEV committed
118
                    else:
WenmuZhou's avatar
WenmuZhou committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
                        conv_name = "res" + str(block_index + 2) + chr(97 + i)
                else:
                    conv_name = "res" + str(block_index + 2) + chr(97 + i)
                block_list.append(
                    block_class(
                        in_channels=in_ch,
                        out_channels=num_filters[block_index],
                        stride=2 if i == 0 and block_index != 0 else 1,
                        if_first=block_index == i == 0,
                        name=conv_name))
                in_ch = block_list[-1].out_channels
            self.out_channels.append(in_ch)
            self.stages.append(nn.Sequential(*block_list))
        for i, stage in enumerate(self.stages):
            self.add_sublayer(sublayer=stage, name="stage{}".format(i))

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(x)
        out_list = []
        for stage in self.stages:
            x = stage(x)
            out_list.append(x)
        return out_list


class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 groups=1,
                 act=None,
                 name=None):
        super(ConvBNLayer, self).__init__()
        self.conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
LDOUBLEV's avatar
LDOUBLEV committed
159
            stride=stride,
WenmuZhou's avatar
WenmuZhou committed
160
            padding=(kernel_size - 1) // 2,
LDOUBLEV's avatar
LDOUBLEV committed
161
            groups=groups,
WenmuZhou's avatar
WenmuZhou committed
162
            weight_attr=ParamAttr(name=name + "_weights"),
LDOUBLEV's avatar
LDOUBLEV committed
163
164
165
166
167
            bias_attr=False)
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
WenmuZhou's avatar
WenmuZhou committed
168
169
        self.bn = nn.BatchNorm(
            num_channels=out_channels,
LDOUBLEV's avatar
LDOUBLEV committed
170
            act=act,
WenmuZhou's avatar
WenmuZhou committed
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
            param_attr=ParamAttr(name=bn_name + "_scale"),
            bias_attr=ParamAttr(name=bn_name + "_offset"),
            moving_mean_name=bn_name + "_mean",
            moving_variance_name=bn_name + "_variance")

    def __call__(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return x


class ConvBNLayerNew(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 groups=1,
                 act=None,
                 name=None):
        super(ConvBNLayerNew, self).__init__()
        self.pool = nn.AvgPool2d(
            kernel_size=2, stride=2, padding=0, ceil_mode=True)

        self.conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
LDOUBLEV's avatar
LDOUBLEV committed
199
            stride=1,
WenmuZhou's avatar
WenmuZhou committed
200
            padding=(kernel_size - 1) // 2,
LDOUBLEV's avatar
LDOUBLEV committed
201
            groups=groups,
WenmuZhou's avatar
WenmuZhou committed
202
            weight_attr=ParamAttr(name=name + "_weights"),
LDOUBLEV's avatar
LDOUBLEV committed
203
204
205
206
207
            bias_attr=False)
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
WenmuZhou's avatar
WenmuZhou committed
208
209
        self.bn = nn.BatchNorm(
            num_channels=out_channels,
LDOUBLEV's avatar
LDOUBLEV committed
210
            act=act,
WenmuZhou's avatar
WenmuZhou committed
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
            param_attr=ParamAttr(name=bn_name + "_scale"),
            bias_attr=ParamAttr(name=bn_name + "_offset"),
            moving_mean_name=bn_name + "_mean",
            moving_variance_name=bn_name + "_variance")

    def __call__(self, x):
        x = self.pool(x)
        x = self.conv(x)
        x = self.bn(x)
        return x


class ShortCut(nn.Layer):
    def __init__(self, in_channels, out_channels, stride, name, if_first=False):
        super(ShortCut, self).__init__()
        self.use_conv = True
        if in_channels != out_channels or stride != 1:
LDOUBLEV's avatar
LDOUBLEV committed
228
            if if_first:
WenmuZhou's avatar
WenmuZhou committed
229
230
                self.conv = ConvBNLayer(
                    in_channels, out_channels, 1, stride, name=name)
LDOUBLEV's avatar
LDOUBLEV committed
231
            else:
WenmuZhou's avatar
WenmuZhou committed
232
233
                self.conv = ConvBNLayerNew(
                    in_channels, out_channels, 1, stride, name=name)
LDOUBLEV's avatar
LDOUBLEV committed
234
        elif if_first:
WenmuZhou's avatar
WenmuZhou committed
235
236
            self.conv = ConvBNLayer(
                in_channels, out_channels, 1, stride, name=name)
LDOUBLEV's avatar
LDOUBLEV committed
237
        else:
WenmuZhou's avatar
WenmuZhou committed
238
239
240
241
242
243
            self.use_conv = False

    def forward(self, x):
        if self.use_conv:
            x = self.conv(x)
        return x
LDOUBLEV's avatar
LDOUBLEV committed
244

WenmuZhou's avatar
WenmuZhou committed
245
246
247
248
249
250
251
252

class BottleneckBlock(nn.Layer):
    def __init__(self, in_channels, out_channels, stride, name, if_first):
        super(BottleneckBlock, self).__init__()
        self.conv0 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=1,
LDOUBLEV's avatar
LDOUBLEV committed
253
254
            act='relu',
            name=name + "_branch2a")
WenmuZhou's avatar
WenmuZhou committed
255
256
257
258
        self.conv1 = ConvBNLayer(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=3,
LDOUBLEV's avatar
LDOUBLEV committed
259
260
261
            stride=stride,
            act='relu',
            name=name + "_branch2b")
WenmuZhou's avatar
WenmuZhou committed
262
263
264
265
        self.conv2 = ConvBNLayer(
            in_channels=out_channels,
            out_channels=out_channels * 4,
            kernel_size=1,
LDOUBLEV's avatar
LDOUBLEV committed
266
267
268
            act=None,
            name=name + "_branch2c")

WenmuZhou's avatar
WenmuZhou committed
269
270
271
272
        self.short = ShortCut(
            in_channels=in_channels,
            out_channels=out_channels * 4,
            stride=stride,
LDOUBLEV's avatar
LDOUBLEV committed
273
274
            if_first=if_first,
            name=name + "_branch1")
WenmuZhou's avatar
WenmuZhou committed
275
276
277
278
279
280
281
282
283
        self.out_channels = out_channels * 4

    def forward(self, x):
        y = self.conv0(x)
        y = self.conv1(y)
        y = self.conv2(y)
        y = y + self.short(x)
        y = F.relu(y)
        return y
LDOUBLEV's avatar
LDOUBLEV committed
284
285


WenmuZhou's avatar
WenmuZhou committed
286
287
288
289
290
291
292
class BasicBlock(nn.Layer):
    def __init__(self, in_channels, out_channels, stride, name, if_first):
        super(BasicBlock, self).__init__()
        self.conv0 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=3,
LDOUBLEV's avatar
LDOUBLEV committed
293
294
295
            act='relu',
            stride=stride,
            name=name + "_branch2a")
WenmuZhou's avatar
WenmuZhou committed
296
297
298
299
        self.conv1 = ConvBNLayer(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=3,
LDOUBLEV's avatar
LDOUBLEV committed
300
301
            act=None,
            name=name + "_branch2b")
WenmuZhou's avatar
WenmuZhou committed
302
303
304
305
        self.short = ShortCut(
            in_channels=in_channels,
            out_channels=out_channels,
            stride=stride,
LDOUBLEV's avatar
LDOUBLEV committed
306
307
            if_first=if_first,
            name=name + "_branch1")
WenmuZhou's avatar
WenmuZhou committed
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
        self.out_channels = out_channels

    def forward(self, x):
        y = self.conv0(x)
        y = self.conv1(y)
        y = y + self.short(x)
        return F.relu(y)


if __name__ == '__main__':
    import paddle

    paddle.disable_static()
    x = paddle.zeros([1, 3, 640, 640])
    x = paddle.to_variable(x)
    print(x.shape)
    net = ResNet(layers=18)
    y = net(x)

    for stage in y:
        print(stage.shape)
    # paddle.save(net.state_dict(),'1.pth')