"PyTorch/NLP/new-Transformer/CODE_OF_CONDUCT.md" did not exist on "0fc002dfc863089e33ea2dee33b0827046e4d174"
Commit 721c76b4 authored by LDOUBLEV's avatar LDOUBLEV
Browse files

fix conflict

parents 98162be4 b77f9ec0
# -*- coding:utf-8 -*- # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_border_map.py
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
......
# -*- coding:utf-8 -*- # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -12,12 +24,8 @@ from shapely.geometry import Polygon ...@@ -12,12 +24,8 @@ from shapely.geometry import Polygon
__all__ = ['MakePseGt'] __all__ = ['MakePseGt']
class MakePseGt(object):
r'''
Making binary mask from detection data with ICDAR format.
Typically following the process of class `MakeICDARData`.
'''
class MakePseGt(object):
def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs): def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs):
self.kernel_num = kernel_num self.kernel_num = kernel_num
self.min_shrink_ratio = min_shrink_ratio self.min_shrink_ratio = min_shrink_ratio
...@@ -38,16 +46,20 @@ class MakePseGt(object): ...@@ -38,16 +46,20 @@ class MakePseGt(object):
text_polys *= scale text_polys *= scale
gt_kernels = [] gt_kernels = []
for i in range(1,self.kernel_num+1): for i in range(1, self.kernel_num + 1):
# s1->sn, from big to small # s1->sn, from big to small
rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1) * i rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1
text_kernel, ignore_tags = self.generate_kernel(image.shape[0:2], rate, text_polys, ignore_tags) ) * i
text_kernel, ignore_tags = self.generate_kernel(
image.shape[0:2], rate, text_polys, ignore_tags)
gt_kernels.append(text_kernel) gt_kernels.append(text_kernel)
training_mask = np.ones(image.shape[0:2], dtype='uint8') training_mask = np.ones(image.shape[0:2], dtype='uint8')
for i in range(text_polys.shape[0]): for i in range(text_polys.shape[0]):
if ignore_tags[i]: if ignore_tags[i]:
cv2.fillPoly(training_mask, text_polys[i].astype(np.int32)[np.newaxis, :, :], 0) cv2.fillPoly(training_mask,
text_polys[i].astype(np.int32)[np.newaxis, :, :],
0)
gt_kernels = np.array(gt_kernels) gt_kernels = np.array(gt_kernels)
gt_kernels[gt_kernels > 0] = 1 gt_kernels[gt_kernels > 0] = 1
...@@ -59,16 +71,25 @@ class MakePseGt(object): ...@@ -59,16 +71,25 @@ class MakePseGt(object):
data['mask'] = training_mask.astype('float32') data['mask'] = training_mask.astype('float32')
return data return data
def generate_kernel(self, img_size, shrink_ratio, text_polys, ignore_tags=None): def generate_kernel(self,
img_size,
shrink_ratio,
text_polys,
ignore_tags=None):
"""
Refer to part of the code:
https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py
"""
h, w = img_size h, w = img_size
text_kernel = np.zeros((h, w), dtype=np.float32) text_kernel = np.zeros((h, w), dtype=np.float32)
for i, poly in enumerate(text_polys): for i, poly in enumerate(text_polys):
polygon = Polygon(poly) polygon = Polygon(poly)
distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / (polygon.length + 1e-6) distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / (
polygon.length + 1e-6)
subject = [tuple(l) for l in poly] subject = [tuple(l) for l in poly]
pco = pyclipper.PyclipperOffset() pco = pyclipper.PyclipperOffset()
pco.AddPath(subject, pyclipper.JT_ROUND, pco.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
pyclipper.ET_CLOSEDPOLYGON)
shrinked = np.array(pco.Execute(-distance)) shrinked = np.array(pco.Execute(-distance))
if len(shrinked) == 0 or shrinked.size == 0: if len(shrinked) == 0 or shrinked.size == 0:
......
# -*- coding:utf-8 -*- # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_shrink_map.py
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
......
# -*- coding:utf-8 -*- # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/random_crop_data.py
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
......
...@@ -87,17 +87,17 @@ class RecResizeImg(object): ...@@ -87,17 +87,17 @@ class RecResizeImg(object):
def __init__(self, def __init__(self,
image_shape, image_shape,
infer_mode=False, infer_mode=False,
character_type='ch', character_dict_path='./ppocr/utils/ppocr_keys_v1.txt',
padding=True, padding=True,
**kwargs): **kwargs):
self.image_shape = image_shape self.image_shape = image_shape
self.infer_mode = infer_mode self.infer_mode = infer_mode
self.character_type = character_type self.character_dict_path = character_dict_path
self.padding = padding self.padding = padding
def __call__(self, data): def __call__(self, data):
img = data['image'] img = data['image']
if self.infer_mode and self.character_type == "ch": if self.infer_mode and self.character_dict_path is not None:
norm_img = resize_norm_img_chinese(img, self.image_shape) norm_img = resize_norm_img_chinese(img, self.image_shape)
else: else:
norm_img = resize_norm_img(img, self.image_shape, self.padding) norm_img = resize_norm_img(img, self.image_shape, self.padding)
......
...@@ -11,7 +11,10 @@ ...@@ -11,7 +11,10 @@
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and #See the License for the specific language governing permissions and
#limitations under the License. #limitations under the License.
"""
This part code is refered from:
https://github.com/songdejia/EAST/blob/master/data_utils.py
"""
import math import math
import cv2 import cv2
import numpy as np import numpy as np
......
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
This code is refer from:
https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/augment.py
"""
import numpy as np import numpy as np
from .warp_mls import WarpMLS from .warp_mls import WarpMLS
......
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
This code is refer from:
https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/warp_mls.py
"""
import numpy as np import numpy as np
...@@ -161,4 +165,4 @@ class WarpMLS: ...@@ -161,4 +165,4 @@ class WarpMLS:
dst = np.clip(dst, 0, 255) dst = np.clip(dst, 0, 255)
dst = np.array(dst, dtype=np.uint8) dst = np.array(dst, dtype=np.uint8)
return dst return dst
\ No newline at end of file
...@@ -11,6 +11,9 @@ ...@@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# This code is refer from: https://github.com/viig99/LS-ACELoss
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -32,6 +35,7 @@ class ACELoss(nn.Layer): ...@@ -32,6 +35,7 @@ class ACELoss(nn.Layer):
def __call__(self, predicts, batch): def __call__(self, predicts, batch):
if isinstance(predicts, (list, tuple)): if isinstance(predicts, (list, tuple)):
predicts = predicts[-1] predicts = predicts[-1]
B, N = predicts.shape[:2] B, N = predicts.shape[:2]
div = paddle.to_tensor([N]).astype('float32') div = paddle.to_tensor([N]).astype('float32')
...@@ -42,9 +46,7 @@ class ACELoss(nn.Layer): ...@@ -42,9 +46,7 @@ class ACELoss(nn.Layer):
length = batch[2].astype("float32") length = batch[2].astype("float32")
batch = batch[3].astype("float32") batch = batch[3].astype("float32")
batch[:, 0] = paddle.subtract(div, length) batch[:, 0] = paddle.subtract(div, length)
batch = paddle.divide(batch, div) batch = paddle.divide(batch, div)
loss = self.loss_func(aggregation_preds, batch) loss = self.loss_func(aggregation_preds, batch)
return {"loss_ace": loss} return {"loss_ace": loss}
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#See the License for the specific language governing permissions and #See the License for the specific language governing permissions and
#limitations under the License. #limitations under the License.
# This code is refer from: https://github.com/KaiyangZhou/pytorch-center-loss
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -28,22 +30,17 @@ class CenterLoss(nn.Layer): ...@@ -28,22 +30,17 @@ class CenterLoss(nn.Layer):
Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
""" """
def __init__(self, def __init__(self, num_classes=6625, feat_dim=96, center_file_path=None):
num_classes=6625,
feat_dim=96,
init_center=False,
center_file_path=None):
super().__init__() super().__init__()
self.num_classes = num_classes self.num_classes = num_classes
self.feat_dim = feat_dim self.feat_dim = feat_dim
self.centers = paddle.randn( self.centers = paddle.randn(
shape=[self.num_classes, self.feat_dim]).astype( shape=[self.num_classes, self.feat_dim]).astype("float64")
"float64") #random center
if init_center: if center_file_path is not None:
assert os.path.exists( assert os.path.exists(
center_file_path center_file_path
), f"center path({center_file_path}) must exist when init_center is set as True." ), f"center path({center_file_path}) must exist when it is not None."
with open(center_file_path, 'rb') as f: with open(center_file_path, 'rb') as f:
char_dict = pickle.load(f) char_dict = pickle.load(f)
for key in char_dict.keys(): for key in char_dict.keys():
...@@ -60,22 +57,23 @@ class CenterLoss(nn.Layer): ...@@ -60,22 +57,23 @@ class CenterLoss(nn.Layer):
batch_size = feats_reshape.shape[0] batch_size = feats_reshape.shape[0]
#calc feat * feat #calc l2 distance between feats and centers
dist1 = paddle.sum(paddle.square(feats_reshape), axis=1, keepdim=True) square_feat = paddle.sum(paddle.square(feats_reshape),
dist1 = paddle.expand(dist1, [batch_size, self.num_classes]) axis=1,
keepdim=True)
square_feat = paddle.expand(square_feat, [batch_size, self.num_classes])
#dist2 of centers square_center = paddle.sum(paddle.square(self.centers),
dist2 = paddle.sum(paddle.square(self.centers), axis=1, axis=1,
keepdim=True) #num_classes keepdim=True)
dist2 = paddle.expand(dist2, square_center = paddle.expand(
[self.num_classes, batch_size]).astype("float64") square_center, [self.num_classes, batch_size]).astype("float64")
dist2 = paddle.transpose(dist2, [1, 0]) square_center = paddle.transpose(square_center, [1, 0])
#first x * x + y * y distmat = paddle.add(square_feat, square_center)
distmat = paddle.add(dist1, dist2) feat_dot_center = paddle.matmul(feats_reshape,
tmp = paddle.matmul(feats_reshape, paddle.transpose(self.centers, [1, 0]))
paddle.transpose(self.centers, [1, 0])) distmat = distmat - 2.0 * feat_dot_center
distmat = distmat - 2.0 * tmp
#generate the mask #generate the mask
classes = paddle.arange(self.num_classes).astype("int64") classes = paddle.arange(self.num_classes).astype("int64")
...@@ -83,7 +81,8 @@ class CenterLoss(nn.Layer): ...@@ -83,7 +81,8 @@ class CenterLoss(nn.Layer):
paddle.unsqueeze(label, 1), (batch_size, self.num_classes)) paddle.unsqueeze(label, 1), (batch_size, self.num_classes))
mask = paddle.equal( mask = paddle.equal(
paddle.expand(classes, [batch_size, self.num_classes]), paddle.expand(classes, [batch_size, self.num_classes]),
label).astype("float64") #get mask label).astype("float64")
dist = paddle.multiply(distmat, mask) dist = paddle.multiply(distmat, mask)
loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size
return {'loss_center': loss} return {'loss_center': loss}
...@@ -11,7 +11,10 @@ ...@@ -11,7 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/basic_loss.py
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -147,4 +150,4 @@ class BCELoss(nn.Layer): ...@@ -147,4 +150,4 @@ class BCELoss(nn.Layer):
def forward(self, input, label, mask=None, weight=None, name=None): def forward(self, input, label, mask=None, weight=None, name=None):
loss = F.binary_cross_entropy(input, label, reduction=self.reduction) loss = F.binary_cross_entropy(input, label, reduction=self.reduction)
return loss return loss
\ No newline at end of file
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/DB_loss.py
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
......
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
This code is refer from:
https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py
"""
import paddle import paddle
from paddle import nn from paddle import nn
......
...@@ -38,7 +38,7 @@ class CTCLoss(nn.Layer): ...@@ -38,7 +38,7 @@ class CTCLoss(nn.Layer):
if self.use_focal_loss: if self.use_focal_loss:
weight = paddle.exp(-loss) weight = paddle.exp(-loss)
weight = paddle.subtract(paddle.to_tensor([1.0]), weight) weight = paddle.subtract(paddle.to_tensor([1.0]), weight)
weight = paddle.square(weight) * self.focal_loss_alpha weight = paddle.square(weight)
loss = paddle.multiply(loss, weight) loss = paddle.multiply(loss, weight)
loss = loss.mean() # sum loss = loss.mean()
return {'loss': loss} return {'loss': loss}
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
from .ace_loss import ACELoss
from .center_loss import CenterLoss
from .rec_ctc_loss import CTCLoss
class EnhancedCTCLoss(nn.Layer):
def __init__(self,
use_focal_loss=False,
use_ace_loss=False,
ace_loss_weight=0.1,
use_center_loss=False,
center_loss_weight=0.05,
num_classes=6625,
feat_dim=96,
init_center=False,
center_file_path=None,
**kwargs):
super(EnhancedCTCLoss, self).__init__()
self.ctc_loss_func = CTCLoss(use_focal_loss=use_focal_loss)
self.use_ace_loss = False
if use_ace_loss:
self.use_ace_loss = use_ace_loss
self.ace_loss_func = ACELoss()
self.ace_loss_weight = ace_loss_weight
self.use_center_loss = False
if use_center_loss:
self.use_center_loss = use_center_loss
self.center_loss_func = CenterLoss(
num_classes=num_classes,
feat_dim=feat_dim,
init_center=init_center,
center_file_path=center_file_path)
self.center_loss_weight = center_loss_weight
def __call__(self, predicts, batch):
loss = self.ctc_loss_func(predicts, batch)["loss"]
if self.use_center_loss:
center_loss = self.center_loss_func(
predicts, batch)["loss_center"] * self.center_loss_weight
loss = loss + center_loss
if self.use_ace_loss:
ace_loss = self.ace_loss_func(
predicts, batch)["loss_ace"] * self.ace_loss_weight
loss = loss + ace_loss
return {'enhanced_ctc_loss': loss}
...@@ -22,7 +22,7 @@ class NRTRLoss(nn.Layer): ...@@ -22,7 +22,7 @@ class NRTRLoss(nn.Layer):
log_prb = F.log_softmax(pred, axis=1) log_prb = F.log_softmax(pred, axis=1)
non_pad_mask = paddle.not_equal( non_pad_mask = paddle.not_equal(
tgt, paddle.zeros( tgt, paddle.zeros(
tgt.shape, dtype='int64')) tgt.shape, dtype=tgt.dtype))
loss = -(one_hot * log_prb).sum(axis=1) loss = -(one_hot * log_prb).sum(axis=1)
loss = loss.masked_select(non_pad_mask).mean() loss = loss.masked_select(non_pad_mask).mean()
else: else:
......
...@@ -9,11 +9,14 @@ from paddle import nn ...@@ -9,11 +9,14 @@ from paddle import nn
class SARLoss(nn.Layer): class SARLoss(nn.Layer):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(SARLoss, self).__init__() super(SARLoss, self).__init__()
self.loss_func = paddle.nn.loss.CrossEntropyLoss(reduction="mean", ignore_index=96) self.loss_func = paddle.nn.loss.CrossEntropyLoss(
reduction="mean", ignore_index=92)
def forward(self, predicts, batch): def forward(self, predicts, batch):
predict = predicts[:, :-1, :] # ignore last index of outputs to be in same seq_len with targets predict = predicts[:, :
label = batch[1].astype("int64")[:, 1:] # ignore first index of target in loss calculation -1, :] # ignore last index of outputs to be in same seq_len with targets
label = batch[1].astype(
"int64")[:, 1:] # ignore first index of target in loss calculation
batch_size, num_steps, num_classes = predict.shape[0], predict.shape[ batch_size, num_steps, num_classes = predict.shape[0], predict.shape[
1], predict.shape[2] 1], predict.shape[2]
assert len(label.shape) == len(list(predict.shape)) - 1, \ assert len(label.shape) == len(list(predict.shape)) - 1, \
......
...@@ -21,7 +21,7 @@ from ppocr.modeling.backbones import build_backbone ...@@ -21,7 +21,7 @@ from ppocr.modeling.backbones import build_backbone
from ppocr.modeling.necks import build_neck from ppocr.modeling.necks import build_neck
from ppocr.modeling.heads import build_head from ppocr.modeling.heads import build_head
from .base_model import BaseModel from .base_model import BaseModel
from ppocr.utils.save_load import init_model, load_pretrained_params from ppocr.utils.save_load import load_pretrained_params
__all__ = ['DistillationModel'] __all__ = ['DistillationModel']
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,30 +12,23 @@ ...@@ -12,30 +12,23 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# This code is refer from: https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/arch/backbone/legendary_models/pp_lcnet.py
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import KaimingNormal
import math import math
import numpy as np import numpy as np
import paddle import paddle
from paddle import ParamAttr, reshape, transpose, concat, split from paddle import ParamAttr, reshape, transpose
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import KaimingNormal from paddle.nn.initializer import KaimingNormal
import math
from paddle.nn.functional import hardswish, hardsigmoid
from paddle.regularizer import L2Decay from paddle.regularizer import L2Decay
from paddle.nn.functional import hardswish, hardsigmoid
class ConvBNLayer(nn.Layer): class ConvBNLayer(nn.Layer):
......
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -18,12 +37,12 @@ def conv3x3(in_channel, out_channel, stride=1): ...@@ -18,12 +37,12 @@ def conv3x3(in_channel, out_channel, stride=1):
kernel_size=3, kernel_size=3,
stride=stride, stride=stride,
padding=1, padding=1,
bias_attr=False bias_attr=False)
)
class BasicBlock(nn.Layer): class BasicBlock(nn.Layer):
expansion = 1 expansion = 1
def __init__(self, in_channels, channels, stride=1, downsample=False): def __init__(self, in_channels, channels, stride=1, downsample=False):
super().__init__() super().__init__()
self.conv1 = conv3x3(in_channels, channels, stride) self.conv1 = conv3x3(in_channels, channels, stride)
...@@ -34,9 +53,13 @@ class BasicBlock(nn.Layer): ...@@ -34,9 +53,13 @@ class BasicBlock(nn.Layer):
self.downsample = downsample self.downsample = downsample
if downsample: if downsample:
self.downsample = nn.Sequential( self.downsample = nn.Sequential(
nn.Conv2D(in_channels, channels * self.expansion, 1, stride, bias_attr=False), nn.Conv2D(
nn.BatchNorm2D(channels * self.expansion), in_channels,
) channels * self.expansion,
1,
stride,
bias_attr=False),
nn.BatchNorm2D(channels * self.expansion), )
else: else:
self.downsample = nn.Sequential() self.downsample = nn.Sequential()
self.stride = stride self.stride = stride
...@@ -57,7 +80,7 @@ class BasicBlock(nn.Layer): ...@@ -57,7 +80,7 @@ class BasicBlock(nn.Layer):
out += residual out += residual
out = self.relu(out) out = self.relu(out)
return out return out
class ResNet31(nn.Layer): class ResNet31(nn.Layer):
...@@ -69,12 +92,13 @@ class ResNet31(nn.Layer): ...@@ -69,12 +92,13 @@ class ResNet31(nn.Layer):
out_indices (None | Sequence[int]): Indices of output stages. out_indices (None | Sequence[int]): Indices of output stages.
last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage. last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
''' '''
def __init__(self,
in_channels=3, def __init__(self,
layers=[1, 2, 5, 3], in_channels=3,
channels=[64, 128, 256, 256, 512, 512, 512], layers=[1, 2, 5, 3],
out_indices=None, channels=[64, 128, 256, 256, 512, 512, 512],
last_stage_pool=False): out_indices=None,
last_stage_pool=False):
super(ResNet31, self).__init__() super(ResNet31, self).__init__()
assert isinstance(in_channels, int) assert isinstance(in_channels, int)
assert isinstance(last_stage_pool, bool) assert isinstance(last_stage_pool, bool)
...@@ -83,46 +107,56 @@ class ResNet31(nn.Layer): ...@@ -83,46 +107,56 @@ class ResNet31(nn.Layer):
self.last_stage_pool = last_stage_pool self.last_stage_pool = last_stage_pool
# conv 1 (Conv Conv) # conv 1 (Conv Conv)
self.conv1_1 = nn.Conv2D(in_channels, channels[0], kernel_size=3, stride=1, padding=1) self.conv1_1 = nn.Conv2D(
in_channels, channels[0], kernel_size=3, stride=1, padding=1)
self.bn1_1 = nn.BatchNorm2D(channels[0]) self.bn1_1 = nn.BatchNorm2D(channels[0])
self.relu1_1 = nn.ReLU() self.relu1_1 = nn.ReLU()
self.conv1_2 = nn.Conv2D(channels[0], channels[1], kernel_size=3, stride=1, padding=1) self.conv1_2 = nn.Conv2D(
channels[0], channels[1], kernel_size=3, stride=1, padding=1)
self.bn1_2 = nn.BatchNorm2D(channels[1]) self.bn1_2 = nn.BatchNorm2D(channels[1])
self.relu1_2 = nn.ReLU() self.relu1_2 = nn.ReLU()
# conv 2 (Max-pooling, Residual block, Conv) # conv 2 (Max-pooling, Residual block, Conv)
self.pool2 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) self.pool2 = nn.MaxPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.block2 = self._make_layer(channels[1], channels[2], layers[0]) self.block2 = self._make_layer(channels[1], channels[2], layers[0])
self.conv2 = nn.Conv2D(channels[2], channels[2], kernel_size=3, stride=1, padding=1) self.conv2 = nn.Conv2D(
channels[2], channels[2], kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2D(channels[2]) self.bn2 = nn.BatchNorm2D(channels[2])
self.relu2 = nn.ReLU() self.relu2 = nn.ReLU()
# conv 3 (Max-pooling, Residual block, Conv) # conv 3 (Max-pooling, Residual block, Conv)
self.pool3 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) self.pool3 = nn.MaxPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.block3 = self._make_layer(channels[2], channels[3], layers[1]) self.block3 = self._make_layer(channels[2], channels[3], layers[1])
self.conv3 = nn.Conv2D(channels[3], channels[3], kernel_size=3, stride=1, padding=1) self.conv3 = nn.Conv2D(
channels[3], channels[3], kernel_size=3, stride=1, padding=1)
self.bn3 = nn.BatchNorm2D(channels[3]) self.bn3 = nn.BatchNorm2D(channels[3])
self.relu3 = nn.ReLU() self.relu3 = nn.ReLU()
# conv 4 (Max-pooling, Residual block, Conv) # conv 4 (Max-pooling, Residual block, Conv)
self.pool4 = nn.MaxPool2D(kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True) self.pool4 = nn.MaxPool2D(
kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True)
self.block4 = self._make_layer(channels[3], channels[4], layers[2]) self.block4 = self._make_layer(channels[3], channels[4], layers[2])
self.conv4 = nn.Conv2D(channels[4], channels[4], kernel_size=3, stride=1, padding=1) self.conv4 = nn.Conv2D(
channels[4], channels[4], kernel_size=3, stride=1, padding=1)
self.bn4 = nn.BatchNorm2D(channels[4]) self.bn4 = nn.BatchNorm2D(channels[4])
self.relu4 = nn.ReLU() self.relu4 = nn.ReLU()
# conv 5 ((Max-pooling), Residual block, Conv) # conv 5 ((Max-pooling), Residual block, Conv)
self.pool5 = None self.pool5 = None
if self.last_stage_pool: if self.last_stage_pool:
self.pool5 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) self.pool5 = nn.MaxPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.block5 = self._make_layer(channels[4], channels[5], layers[3]) self.block5 = self._make_layer(channels[4], channels[5], layers[3])
self.conv5 = nn.Conv2D(channels[5], channels[5], kernel_size=3, stride=1, padding=1) self.conv5 = nn.Conv2D(
channels[5], channels[5], kernel_size=3, stride=1, padding=1)
self.bn5 = nn.BatchNorm2D(channels[5]) self.bn5 = nn.BatchNorm2D(channels[5])
self.relu5 = nn.ReLU() self.relu5 = nn.ReLU()
self.out_channels = channels[-1] self.out_channels = channels[-1]
def _make_layer(self, input_channels, output_channels, blocks): def _make_layer(self, input_channels, output_channels, blocks):
layers = [] layers = []
for _ in range(blocks): for _ in range(blocks):
...@@ -130,19 +164,19 @@ class ResNet31(nn.Layer): ...@@ -130,19 +164,19 @@ class ResNet31(nn.Layer):
if input_channels != output_channels: if input_channels != output_channels:
downsample = nn.Sequential( downsample = nn.Sequential(
nn.Conv2D( nn.Conv2D(
input_channels, input_channels,
output_channels, output_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
bias_attr=False), bias_attr=False),
nn.BatchNorm2D(output_channels), nn.BatchNorm2D(output_channels), )
)
layers.append(
layers.append(BasicBlock(input_channels, output_channels, downsample=downsample)) BasicBlock(
input_channels, output_channels, downsample=downsample))
input_channels = output_channels input_channels = output_channels
return nn.Sequential(*layers) return nn.Sequential(*layers)
def forward(self, x): def forward(self, x):
x = self.conv1_1(x) x = self.conv1_1(x)
x = self.bn1_1(x) x = self.bn1_1(x)
...@@ -166,11 +200,11 @@ class ResNet31(nn.Layer): ...@@ -166,11 +200,11 @@ class ResNet31(nn.Layer):
x = block_layer(x) x = block_layer(x)
x = conv_layer(x) x = conv_layer(x)
x = bn_layer(x) x = bn_layer(x)
x= relu_layer(x) x = relu_layer(x)
outs.append(x) outs.append(x)
if self.out_indices is not None: if self.out_indices is not None:
return tuple([outs[i] for i in self.out_indices]) return tuple([outs[i] for i in self.out_indices])
return x return x
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment