rec_resnet_vd.py 9.97 KB
Newer Older
WenmuZhou's avatar
WenmuZhou committed
1
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
LDOUBLEV's avatar
LDOUBLEV committed
2
#
WenmuZhou's avatar
WenmuZhou committed
3
4
5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
LDOUBLEV's avatar
LDOUBLEV committed
6
7
8
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
WenmuZhou's avatar
WenmuZhou committed
9
10
11
12
13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
LDOUBLEV's avatar
LDOUBLEV committed
14
15
16
17
18

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

WenmuZhou's avatar
WenmuZhou committed
19
20
from paddle import nn, ParamAttr
from paddle.nn import functional as F
LDOUBLEV's avatar
LDOUBLEV committed
21

WenmuZhou's avatar
WenmuZhou committed
22
__all__ = ["ResNet"]
LDOUBLEV's avatar
LDOUBLEV committed
23
24


WenmuZhou's avatar
WenmuZhou committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
class ResNet(nn.Layer):
    def __init__(self, in_channels=3, layers=34):
        super(ResNet, self).__init__()
        supported_layers = {
            18: {
                'depth': [2, 2, 2, 2],
                'block_class': BasicBlock
            },
            34: {
                'depth': [3, 4, 6, 3],
                'block_class': BasicBlock
            },
            50: {
                'depth': [3, 4, 6, 3],
                'block_class': BottleneckBlock
            },
            101: {
                'depth': [3, 4, 23, 3],
                'block_class': BottleneckBlock
            },
            152: {
                'depth': [3, 8, 36, 3],
                'block_class': BottleneckBlock
            },
            200: {
                'depth': [3, 12, 48, 3],
                'block_class': BottleneckBlock
            }
        }
        assert layers in supported_layers, \
            "supported layers are {} but input layer is {}".format(supported_layers.keys(), layers)
        is_3x3 = True
LDOUBLEV's avatar
LDOUBLEV committed
57
58

        num_filters = [64, 128, 256, 512]
WenmuZhou's avatar
WenmuZhou committed
59
60
61
        depth = supported_layers[layers]['depth']
        block_class = supported_layers[layers]['block_class']
        conv = []
LDOUBLEV's avatar
LDOUBLEV committed
62
        if is_3x3 == False:
WenmuZhou's avatar
WenmuZhou committed
63
64
65
66
67
68
69
            conv.append(
                ConvBNLayer(
                    in_channels=in_channels,
                    out_channels=64,
                    kernel_size=7,
                    stride=1,
                    act='relu'))
LDOUBLEV's avatar
LDOUBLEV committed
70
        else:
WenmuZhou's avatar
WenmuZhou committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
            conv.append(
                ConvBNLayer(
                    in_channels=in_channels,
                    out_channels=32,
                    kernel_size=3,
                    stride=1,
                    act='relu',
                    name='conv1_1'))
            conv.append(
                ConvBNLayer(
                    in_channels=32,
                    out_channels=32,
                    kernel_size=3,
                    stride=1,
                    act='relu',
                    name='conv1_2'))
            conv.append(
                ConvBNLayer(
                    in_channels=32,
                    out_channels=64,
                    kernel_size=3,
                    stride=1,
                    act='relu',
                    name='conv1_3'))
        self.conv1 = nn.Sequential(*conv)
LDOUBLEV's avatar
LDOUBLEV committed
96

WenmuZhou's avatar
WenmuZhou committed
97
98
99
100
        self.pool = nn.MaxPool2d(
            kernel_size=3,
            stride=2,
            padding=1, )
LDOUBLEV's avatar
LDOUBLEV committed
101

WenmuZhou's avatar
WenmuZhou committed
102
103
104
105
106
107
        block_list = []
        in_ch = 64
        for block_index in range(len(depth)):
            for i in range(depth[block_index]):
                if layers >= 50:
                    if layers in [101, 152, 200] and block_index == 2:
LDOUBLEV's avatar
LDOUBLEV committed
108
                        if i == 0:
WenmuZhou's avatar
WenmuZhou committed
109
                            conv_name = "res" + str(block_index + 2) + "a"
LDOUBLEV's avatar
LDOUBLEV committed
110
                        else:
WenmuZhou's avatar
WenmuZhou committed
111
112
                            conv_name = "res" + str(block_index +
                                                    2) + "b" + str(i)
LDOUBLEV's avatar
LDOUBLEV committed
113
                    else:
WenmuZhou's avatar
WenmuZhou committed
114
115
116
117
118
119
120
121
122
123
124
                        conv_name = "res" + str(block_index + 2) + chr(97 + i)
                else:
                    conv_name = "res" + str(block_index + 2) + chr(97 + i)
                if i == 0 and block_index != 0:
                    stride = (2, 1)
                else:
                    stride = (1, 1)
                block_list.append(
                    block_class(
                        in_channels=in_ch,
                        out_channels=num_filters[block_index],
LDOUBLEV's avatar
LDOUBLEV committed
125
                        stride=stride,
WenmuZhou's avatar
WenmuZhou committed
126
127
128
129
130
131
132
                        if_first=block_index == i == 0,
                        name=conv_name))
                in_ch = block_list[-1].out_channels
        self.block_list = nn.Sequential(*block_list)
        self.add_sublayer(sublayer=self.block_list, name="block_list")
        self.pool_out = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.out_channels = in_ch
LDOUBLEV's avatar
LDOUBLEV committed
133

WenmuZhou's avatar
WenmuZhou committed
134
135
136
137
138
139
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(x)
        x = self.block_list(x)
        x = self.pool_out(x)
        return x
LDOUBLEV's avatar
LDOUBLEV committed
140
141


WenmuZhou's avatar
WenmuZhou committed
142
143
144
145
146
147
148
149
150
151
152
153
154
155
class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 groups=1,
                 act=None,
                 name=None):
        super(ConvBNLayer, self).__init__()
        self.conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
LDOUBLEV's avatar
LDOUBLEV committed
156
            stride=stride,
WenmuZhou's avatar
WenmuZhou committed
157
            padding=(kernel_size - 1) // 2,
LDOUBLEV's avatar
LDOUBLEV committed
158
            groups=groups,
WenmuZhou's avatar
WenmuZhou committed
159
            weight_attr=ParamAttr(name=name + "_weights"),
LDOUBLEV's avatar
LDOUBLEV committed
160
161
162
163
164
            bias_attr=False)
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
WenmuZhou's avatar
WenmuZhou committed
165
166
        self.bn = nn.BatchNorm(
            num_channels=out_channels,
LDOUBLEV's avatar
LDOUBLEV committed
167
            act=act,
WenmuZhou's avatar
WenmuZhou committed
168
169
170
171
172
173
174
175
176
            param_attr=ParamAttr(name=bn_name + "_scale"),
            bias_attr=ParamAttr(name=bn_name + "_offset"),
            moving_mean_name=bn_name + "_mean",
            moving_variance_name=bn_name + "_variance")

    def __call__(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return x
LDOUBLEV's avatar
LDOUBLEV committed
177
178


WenmuZhou's avatar
WenmuZhou committed
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
class ConvBNLayerNew(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 groups=1,
                 act=None,
                 name=None):
        super(ConvBNLayerNew, self).__init__()
        self.pool = nn.AvgPool2d(
            kernel_size=stride, stride=stride, padding=0, ceil_mode=True)

        self.conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
LDOUBLEV's avatar
LDOUBLEV committed
196
            stride=1,
WenmuZhou's avatar
WenmuZhou committed
197
            padding=(kernel_size - 1) // 2,
LDOUBLEV's avatar
LDOUBLEV committed
198
            groups=groups,
WenmuZhou's avatar
WenmuZhou committed
199
            weight_attr=ParamAttr(name=name + "_weights"),
LDOUBLEV's avatar
LDOUBLEV committed
200
201
202
203
204
            bias_attr=False)
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
WenmuZhou's avatar
WenmuZhou committed
205
206
        self.bn = nn.BatchNorm(
            num_channels=out_channels,
LDOUBLEV's avatar
LDOUBLEV committed
207
            act=act,
WenmuZhou's avatar
WenmuZhou committed
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
            param_attr=ParamAttr(name=bn_name + "_scale"),
            bias_attr=ParamAttr(name=bn_name + "_offset"),
            moving_mean_name=bn_name + "_mean",
            moving_variance_name=bn_name + "_variance")

    def __call__(self, x):
        x = self.pool(x)
        x = self.conv(x)
        x = self.bn(x)
        return x


class ShortCut(nn.Layer):
    def __init__(self, in_channels, out_channels, stride, name, if_first=False):
        super(ShortCut, self).__init__()
        self.use_conv = True
LDOUBLEV's avatar
LDOUBLEV committed
224

WenmuZhou's avatar
WenmuZhou committed
225
        if in_channels != out_channels or stride[0] != 1:
LDOUBLEV's avatar
LDOUBLEV committed
226
            if if_first:
WenmuZhou's avatar
WenmuZhou committed
227
228
                self.conv = ConvBNLayer(
                    in_channels, out_channels, 1, stride, name=name)
LDOUBLEV's avatar
LDOUBLEV committed
229
            else:
WenmuZhou's avatar
WenmuZhou committed
230
231
                self.conv = ConvBNLayerNew(
                    in_channels, out_channels, 1, stride, name=name)
LDOUBLEV's avatar
LDOUBLEV committed
232
        elif if_first:
WenmuZhou's avatar
WenmuZhou committed
233
234
            self.conv = ConvBNLayer(
                in_channels, out_channels, 1, stride, name=name)
LDOUBLEV's avatar
LDOUBLEV committed
235
        else:
WenmuZhou's avatar
WenmuZhou committed
236
            self.use_conv = False
LDOUBLEV's avatar
LDOUBLEV committed
237

WenmuZhou's avatar
WenmuZhou committed
238
239
240
241
242
243
244
245
246
247
248
249
250
    def forward(self, x):
        if self.use_conv:
            x = self.conv(x)
        return x


class BottleneckBlock(nn.Layer):
    def __init__(self, in_channels, out_channels, stride, name, if_first):
        super(BottleneckBlock, self).__init__()
        self.conv0 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=1,
LDOUBLEV's avatar
LDOUBLEV committed
251
252
            act='relu',
            name=name + "_branch2a")
WenmuZhou's avatar
WenmuZhou committed
253
254
255
256
        self.conv1 = ConvBNLayer(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=3,
LDOUBLEV's avatar
LDOUBLEV committed
257
258
259
            stride=stride,
            act='relu',
            name=name + "_branch2b")
WenmuZhou's avatar
WenmuZhou committed
260
261
262
263
        self.conv2 = ConvBNLayer(
            in_channels=out_channels,
            out_channels=out_channels * 4,
            kernel_size=1,
LDOUBLEV's avatar
LDOUBLEV committed
264
265
266
            act=None,
            name=name + "_branch2c")

WenmuZhou's avatar
WenmuZhou committed
267
268
269
270
        self.short = ShortCut(
            in_channels=in_channels,
            out_channels=out_channels * 4,
            stride=stride,
LDOUBLEV's avatar
LDOUBLEV committed
271
272
            if_first=if_first,
            name=name + "_branch1")
WenmuZhou's avatar
WenmuZhou committed
273
        self.out_channels = out_channels * 4
LDOUBLEV's avatar
LDOUBLEV committed
274

WenmuZhou's avatar
WenmuZhou committed
275
276
277
278
279
280
281
    def forward(self, x):
        y = self.conv0(x)
        y = self.conv1(y)
        y = self.conv2(y)
        y = y + self.short(x)
        y = F.relu(y)
        return y
LDOUBLEV's avatar
LDOUBLEV committed
282

WenmuZhou's avatar
WenmuZhou committed
283
284
285
286
287
288
289
290

class BasicBlock(nn.Layer):
    def __init__(self, in_channels, out_channels, stride, name, if_first):
        super(BasicBlock, self).__init__()
        self.conv0 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=3,
LDOUBLEV's avatar
LDOUBLEV committed
291
292
293
            act='relu',
            stride=stride,
            name=name + "_branch2a")
WenmuZhou's avatar
WenmuZhou committed
294
295
296
297
        self.conv1 = ConvBNLayer(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=3,
LDOUBLEV's avatar
LDOUBLEV committed
298
299
            act=None,
            name=name + "_branch2b")
WenmuZhou's avatar
WenmuZhou committed
300
301
302
303
        self.short = ShortCut(
            in_channels=in_channels,
            out_channels=out_channels,
            stride=stride,
LDOUBLEV's avatar
LDOUBLEV committed
304
305
            if_first=if_first,
            name=name + "_branch1")
WenmuZhou's avatar
WenmuZhou committed
306
307
308
309
310
311
312
        self.out_channels = out_channels

    def forward(self, x):
        y = self.conv0(x)
        y = self.conv1(y)
        y = y + self.short(x)
        return F.relu(y)