Commit b3d6785d authored by myhloli's avatar myhloli
Browse files

refactor(ocr): remove unused code and simplify model architecture

- Remove unused imports and code
- Simplify model architecture by removing unnecessary components
- Update initialization and forward pass logic
- Rename variables for consistency
parent 3cb156f5
......@@ -6,10 +6,9 @@ import numpy as np
from loguru import logger
from magic_pdf.libs.config_reader import get_device
from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.ocr_utils import check_img, preprocess_image, sorted_boxes, \
merge_det_boxes, update_det_boxes, get_rotate_crop_image
from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.tools.infer.predict_system import TextSystem
import tools.infer.pytorchocr_utility as utility
from .ocr_utils import check_img, preprocess_image, sorted_boxes, merge_det_boxes, update_det_boxes, get_rotate_crop_image
from .tools.infer.predict_system import TextSystem
from .tools.infer import pytorchocr_utility as utility
import argparse
......@@ -20,14 +19,9 @@ class PytorchPaddleOCR(TextSystem):
self.lang = kwargs.get('lang', 'ch')
# kwargs['cls_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_mobile_v2.0_cls_infer.pth"
if self.lang == 'ch':
kwargs['det_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_det_infer.pth"
kwargs['rec_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_rec_infer.pth"
kwargs['det_yaml_path'] = "/Users/myhloli/Downloads/PaddleOCR2Pytorch-main/configs/det/ch_PP-OCRv4/ch_PP-OCRv4_det_student.yml"
kwargs['rec_yaml_path'] = "/Users/myhloli/Downloads/PaddleOCR2Pytorch-main/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml"
kwargs['rec_image_shape'] = '3,48,320'
kwargs['device'] = get_device()
......
import os, sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from collections import OrderedDict
import numpy as np
import cv2
import os
import torch
from pytorchocr.modeling.architectures.base_model import BaseModel
from .modeling.architectures.base_model import BaseModel
class BaseOCRV20:
def __init__(self, config, **kwargs):
......@@ -17,45 +12,6 @@ class BaseOCRV20:
def build_net(self, **kwargs):
self.net = BaseModel(self.config, **kwargs)
def load_paddle_weights(self, weights_path):
raise NotImplementedError('implemented in converter.')
print('paddle weights loading...')
import paddle.fluid as fluid
with fluid.dygraph.guard():
para_state_dict, opti_state_dict = fluid.load_dygraph(weights_path)
for k,v in self.net.state_dict().items():
name = k
if name.endswith('num_batches_tracked'):
continue
if name.endswith('running_mean'):
ppname = name.replace('running_mean', '_mean')
elif name.endswith('running_var'):
ppname = name.replace('running_var', '_variance')
elif name.endswith('bias') or name.endswith('weight'):
ppname = name
elif 'lstm' in name:
ppname = name
else:
print('Redundance:')
print(name)
raise ValueError
try:
if ppname.endswith('fc.weight'):
self.net.state_dict()[k].copy_(torch.Tensor(para_state_dict[ppname].T))
else:
self.net.state_dict()[k].copy_(torch.Tensor(para_state_dict[ppname]))
except Exception as e:
print('pytorch: {}, {}'.format(k, v.size()))
print('paddle: {}, {}'.format(ppname, para_state_dict[ppname].shape))
raise e
print('model is loaded: {}'.format(weights_path))
def read_pytorch_weights(self, weights_path):
if not os.path.exists(weights_path):
raise FileNotFoundError('{} is not existed.'.format(weights_path))
......@@ -74,38 +30,9 @@ class BaseOCRV20:
print('weights is loaded.')
def load_pytorch_weights(self, weights_path):
self.net.load_state_dict(torch.load(weights_path))
self.net.load_state_dict(torch.load(weights_path, weights_only=True))
print('model is loaded: {}'.format(weights_path))
def save_pytorch_weights(self, weights_path):
try:
torch.save(self.net.state_dict(), weights_path, _use_new_zipfile_serialization=False)
except:
torch.save(self.net.state_dict(), weights_path) # _use_new_zipfile_serialization=False for torch>=1.6.0
print('model is saved: {}'.format(weights_path))
def print_pytorch_state_dict(self):
print('pytorch:')
for k,v in self.net.state_dict().items():
print('{}----{}'.format(k,type(v)))
def read_paddle_weights(self, weights_path):
import paddle.fluid as fluid
with fluid.dygraph.guard():
para_state_dict, opti_state_dict = fluid.load_dygraph(weights_path)
return para_state_dict, opti_state_dict
def print_paddle_state_dict(self, weights_path):
import paddle.fluid as fluid
with fluid.dygraph.guard():
para_state_dict, opti_state_dict = fluid.load_dygraph(weights_path)
print('paddle"')
for k,v in para_state_dict.items():
print('{}----{}'.format(k,type(v)))
def inference(self, inputs):
with torch.no_grad():
infer = self.net(inputs)
......
......@@ -18,7 +18,6 @@ import copy
# from paddle.io import Dataset, DataLoader, BatchSampler, DistributedBatchSampler
# import paddle.distributed as dist
from pytorchocr.data.imaug import transform, create_operators
# from pytorchocr.data.simple_dataset import SimpleDataSet
# from pytorchocr.data.lmdb_dataset import LMDBDateSet
from .imaug import transform, create_operators
......@@ -15,7 +15,7 @@ from .operators import *
# from .east_process import *
# from .sast_process import *
from .gen_table_mask import *
# from .gen_table_mask import *
def transform(data, ops=None):
""" transform """
......
"""
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import sys
import six
import cv2
import numpy as np
class GenTableMask(object):
""" gen table mask """
def __init__(self, shrink_h_max, shrink_w_max, mask_type=0, **kwargs):
self.shrink_h_max = 5
self.shrink_w_max = 5
self.mask_type = mask_type
def projection(self, erosion, h, w, spilt_threshold=0):
# 水平投影
projection_map = np.ones_like(erosion)
project_val_array = [0 for _ in range(0, h)]
for j in range(0, h):
for i in range(0, w):
if erosion[j, i] == 255:
project_val_array[j] += 1
# 根据数组,获取切割点
start_idx = 0 # 记录进入字符区的索引
end_idx = 0 # 记录进入空白区域的索引
in_text = False # 是否遍历到了字符区内
box_list = []
for i in range(len(project_val_array)):
if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了
in_text = True
start_idx = i
elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了
end_idx = i
in_text = False
if end_idx - start_idx <= 2:
continue
box_list.append((start_idx, end_idx + 1))
if in_text:
box_list.append((start_idx, h - 1))
# 绘制投影直方图
for j in range(0, h):
for i in range(0, project_val_array[j]):
projection_map[j, i] = 0
return box_list, projection_map
def projection_cx(self, box_img):
box_gray_img = cv2.cvtColor(box_img, cv2.COLOR_BGR2GRAY)
h, w = box_gray_img.shape
# 灰度图片进行二值化处理
ret, thresh1 = cv2.threshold(box_gray_img, 200, 255, cv2.THRESH_BINARY_INV)
# 纵向腐蚀
if h < w:
kernel = np.ones((2, 1), np.uint8)
erode = cv2.erode(thresh1, kernel, iterations=1)
else:
erode = thresh1
# 水平膨胀
kernel = np.ones((1, 5), np.uint8)
erosion = cv2.dilate(erode, kernel, iterations=1)
# 水平投影
projection_map = np.ones_like(erosion)
project_val_array = [0 for _ in range(0, h)]
for j in range(0, h):
for i in range(0, w):
if erosion[j, i] == 255:
project_val_array[j] += 1
# 根据数组,获取切割点
start_idx = 0 # 记录进入字符区的索引
end_idx = 0 # 记录进入空白区域的索引
in_text = False # 是否遍历到了字符区内
box_list = []
spilt_threshold = 0
for i in range(len(project_val_array)):
if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了
in_text = True
start_idx = i
elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了
end_idx = i
in_text = False
if end_idx - start_idx <= 2:
continue
box_list.append((start_idx, end_idx + 1))
if in_text:
box_list.append((start_idx, h - 1))
# 绘制投影直方图
for j in range(0, h):
for i in range(0, project_val_array[j]):
projection_map[j, i] = 0
split_bbox_list = []
if len(box_list) > 1:
for i, (h_start, h_end) in enumerate(box_list):
if i == 0:
h_start = 0
if i == len(box_list):
h_end = h
word_img = erosion[h_start:h_end + 1, :]
word_h, word_w = word_img.shape
w_split_list, w_projection_map = self.projection(word_img.T, word_w, word_h)
w_start, w_end = w_split_list[0][0], w_split_list[-1][1]
if h_start > 0:
h_start -= 1
h_end += 1
word_img = box_img[h_start:h_end + 1:, w_start:w_end + 1, :]
split_bbox_list.append([w_start, h_start, w_end, h_end])
else:
split_bbox_list.append([0, 0, w, h])
return split_bbox_list
def shrink_bbox(self, bbox):
left, top, right, bottom = bbox
sh_h = min(max(int((bottom - top) * 0.1), 1), self.shrink_h_max)
sh_w = min(max(int((right - left) * 0.1), 1), self.shrink_w_max)
left_new = left + sh_w
right_new = right - sh_w
top_new = top + sh_h
bottom_new = bottom - sh_h
if left_new >= right_new:
left_new = left
right_new = right
if top_new >= bottom_new:
top_new = top
bottom_new = bottom
return [left_new, top_new, right_new, bottom_new]
def __call__(self, data):
img = data['image']
cells = data['cells']
height, width = img.shape[0:2]
if self.mask_type == 1:
mask_img = np.zeros((height, width), dtype=np.float32)
else:
mask_img = np.zeros((height, width, 3), dtype=np.float32)
cell_num = len(cells)
for cno in range(cell_num):
if "bbox" in cells[cno]:
bbox = cells[cno]['bbox']
left, top, right, bottom = bbox
box_img = img[top:bottom, left:right, :].copy()
split_bbox_list = self.projection_cx(box_img)
for sno in range(len(split_bbox_list)):
split_bbox_list[sno][0] += left
split_bbox_list[sno][1] += top
split_bbox_list[sno][2] += left
split_bbox_list[sno][3] += top
for sno in range(len(split_bbox_list)):
left, top, right, bottom = split_bbox_list[sno]
left, top, right, bottom = self.shrink_bbox([left, top, right, bottom])
if self.mask_type == 1:
mask_img[top:bottom, left:right] = 1.0
data['mask_img'] = mask_img
else:
mask_img[top:bottom, left:right, :] = (255, 255, 255)
data['image'] = mask_img
return data
class ResizeTableImage(object):
def __init__(self, max_len, **kwargs):
super(ResizeTableImage, self).__init__()
self.max_len = max_len
def get_img_bbox(self, cells):
bbox_list = []
if len(cells) == 0:
return bbox_list
cell_num = len(cells)
for cno in range(cell_num):
if "bbox" in cells[cno]:
bbox = cells[cno]['bbox']
bbox_list.append(bbox)
return bbox_list
def resize_img_table(self, img, bbox_list, max_len):
height, width = img.shape[0:2]
ratio = max_len / (max(height, width) * 1.0)
resize_h = int(height * ratio)
resize_w = int(width * ratio)
img_new = cv2.resize(img, (resize_w, resize_h))
bbox_list_new = []
for bno in range(len(bbox_list)):
left, top, right, bottom = bbox_list[bno].copy()
left = int(left * ratio)
top = int(top * ratio)
right = int(right * ratio)
bottom = int(bottom * ratio)
bbox_list_new.append([left, top, right, bottom])
return img_new, bbox_list_new
def __call__(self, data):
img = data['image']
if 'cells' not in data:
cells = []
else:
cells = data['cells']
bbox_list = self.get_img_bbox(cells)
img_new, bbox_list_new = self.resize_img_table(img, bbox_list, self.max_len)
data['image'] = img_new
cell_num = len(cells)
bno = 0
for cno in range(cell_num):
if "bbox" in data['cells'][cno]:
data['cells'][cno]['bbox'] = bbox_list_new[bno]
bno += 1
data['max_len'] = self.max_len
return data
class PaddingTableImage(object):
def __init__(self, **kwargs):
super(PaddingTableImage, self).__init__()
def __call__(self, data):
img = data['image']
max_len = data['max_len']
padding_img = np.zeros((max_len, max_len, 3), dtype=np.float32)
height, width = img.shape[0:2]
padding_img[0:height, 0:width, :] = img.copy()
data['image'] = padding_img
return data
\ No newline at end of file
ch_ptocr_mobile_v2.0_cls_infer:
model_type: cls
algorithm: CLS
Transform:
Backbone:
name: MobileNetV3
scale: 0.35
model_name: small
Neck:
Head:
name: ClsHead
class_dim: 2
Multilingual_PP-OCRv3_det_infer:
model_type: det
algorithm: DB
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
disable_se: True
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
Head:
name: DBHead
k: 50
en_PP-OCRv3_det_infer:
model_type: det
algorithm: DB
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
disable_se: True
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
Head:
name: DBHead
k: 50
en_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR_LCNet
Transform:
Backbone:
name: PPLCNetV3
scale: 0.95
Head:
name: MultiHead
out_channels_list:
CTCLabelDecode: 97 #'blank' + ...(62) + ' '
head_list:
- CTCHead:
Neck:
name: svtr
dims: 120
depth: 2
hidden_dims: 120
kernel_size: [ 1, 3 ]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: 25
ch_PP-OCRv4_det_infer:
model_type: det
algorithm: DB
Transform: null
Backbone:
name: PPLCNetV3
scale: 0.75
det: True
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
Head:
name: DBHead
k: 50
ch_PP-OCRv4_det_server_infer:
model_type: det
algorithm: DB
Transform: null
Backbone:
name: PPHGNet_small
det: True
Neck:
name: LKPAN
out_channels: 256
intracl: true
Head:
name: PFHeadLocal
k: 50
mode: "large"
ch_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR_LCNet
Transform:
Backbone:
name: PPLCNetV3
scale: 0.95
Head:
name: MultiHead
out_channels_list:
CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
head_list:
- CTCHead:
Neck:
name: svtr
dims: 120
depth: 2
hidden_dims: 120
kernel_size: [ 1, 3 ]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: 25
ch_PP-OCRv4_rec_server_infer:
model_type: rec
algorithm: SVTR_HGNet
Transform:
Backbone:
name: PPHGNet_small
Head:
name: MultiHead
out_channels_list:
CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
head_list:
- CTCHead:
Neck:
name: svtr
dims: 120
depth: 2
hidden_dims: 120
kernel_size: [ 1, 3 ]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: 25
chinese_cht_PP-OCRv3_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [1, 2]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 8423
fc_decay: 0.00001
latin_PP-OCRv3_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 187
fc_decay: 0.00001
cyrillic_PP-OCRv3_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 165
fc_decay: 0.00001
arabic_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 164
fc_decay: 0.00001
korean_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 3690
fc_decay: 0.00001
japan_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 4401
fc_decay: 0.00001
ta_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 130
fc_decay: 0.00001
te_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 153
fc_decay: 0.00001
ka_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 155
fc_decay: 0.00001
devanagari_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 169
fc_decay: 0.00001
......@@ -14,7 +14,7 @@
import copy
__all__ = ['build_model']
__all__ = ["build_model"]
def build_model(config, **kwargs):
......@@ -22,4 +22,4 @@ def build_model(config, **kwargs):
config = copy.deepcopy(config)
module_class = BaseModel(config, **kwargs)
return module_class
\ No newline at end of file
return module_class
import os, sys
# import torch
import torch.nn as nn
# import torch.nn.functional as F
# from pytorchocr.modeling.common import Activation
from torch import nn
from ..backbones import build_backbone
from ..heads import build_head
from ..necks import build_neck
from pytorchocr.modeling.transforms import build_transform
from pytorchocr.modeling.backbones import build_backbone
from pytorchocr.modeling.necks import build_neck
from pytorchocr.modeling.heads import build_head
class BaseModel(nn.Module):
def __init__(self, config, **kwargs):
......@@ -18,27 +14,14 @@ class BaseModel(nn.Module):
"""
super(BaseModel, self).__init__()
in_channels = config.get('in_channels', 3)
model_type = config['model_type']
# build transfrom,
# for rec, transfrom can be TPS,None
# for det and cls, transfrom shoule to be None,
# if you make model differently, you can use transfrom in det and cls
if 'Transform' not in config or config['Transform'] is None:
self.use_transform = False
else:
self.use_transform = True
config['Transform']['in_channels'] = in_channels
self.transform = build_transform(config['Transform'])
in_channels = self.transform.out_channels
# raise NotImplementedError
in_channels = config.get("in_channels", 3)
model_type = config["model_type"]
# build backbone, backbone is need for del, rec and cls
if 'Backbone' not in config or config['Backbone'] is None:
if "Backbone" not in config or config["Backbone"] is None:
self.use_backbone = False
else:
self.use_backbone = True
config["Backbone"]['in_channels'] = in_channels
config["Backbone"]["in_channels"] = in_channels
self.backbone = build_backbone(config["Backbone"], model_type)
in_channels = self.backbone.out_channels
......@@ -46,20 +29,20 @@ class BaseModel(nn.Module):
# for rec, neck can be cnn,rnn or reshape(None)
# for det, neck can be FPN, BIFPN and so on.
# for cls, neck should be none
if 'Neck' not in config or config['Neck'] is None:
if "Neck" not in config or config["Neck"] is None:
self.use_neck = False
else:
self.use_neck = True
config['Neck']['in_channels'] = in_channels
self.neck = build_neck(config['Neck'])
config["Neck"]["in_channels"] = in_channels
self.neck = build_neck(config["Neck"])
in_channels = self.neck.out_channels
# # build head, head is need for det, rec and cls
if 'Head' not in config or config['Head'] is None:
if "Head" not in config or config["Head"] is None:
self.use_head = False
else:
self.use_head = True
config["Head"]['in_channels'] = in_channels
config["Head"]["in_channels"] = in_channels
self.head = build_head(config["Head"], **kwargs)
self.return_all_feats = config.get("return_all_feats", False)
......@@ -70,7 +53,7 @@ class BaseModel(nn.Module):
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
......@@ -81,15 +64,12 @@ class BaseModel(nn.Module):
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
def forward(self, x):
y = dict()
if self.use_transform:
x = self.transform(x)
if self.use_backbone:
x = self.backbone(x)
if isinstance(x, dict):
......@@ -107,9 +87,9 @@ class BaseModel(nn.Module):
if self.use_head:
x = self.head(x)
# for multi head, save ctc neck out for udml
if isinstance(x, dict) and 'ctc_nect' in x.keys():
y['neck_out'] = x['ctc_neck']
y['head_out'] = x
if isinstance(x, dict) and "ctc_nect" in x.keys():
y["neck_out"] = x["ctc_neck"]
y["head_out"] = x
elif isinstance(x, dict):
y.update(x)
else:
......@@ -122,4 +102,4 @@ class BaseModel(nn.Module):
else:
return {final_name: x}
else:
return x
\ No newline at end of file
return x
......@@ -12,45 +12,51 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['build_backbone']
__all__ = ["build_backbone"]
def build_backbone(config, model_type):
if model_type == 'det':
if model_type == "det":
from .det_mobilenet_v3 import MobileNetV3
from .det_resnet import ResNet
from .det_resnet_vd import ResNet_vd
from .det_resnet_vd_sast import ResNet_SAST
from .rec_hgnet import PPHGNet_small
from .rec_lcnetv3 import PPLCNetV3
support_dict = [
"MobileNetV3",
"ResNet",
"ResNet_vd",
"ResNet_SAST",
"PPLCNetV3",
"PPHGNet_small",
]
elif model_type == "rec" or model_type == "cls":
from .rec_hgnet import PPHGNet_small
support_dict = ['MobileNetV3', 'ResNet', 'ResNet_vd', 'ResNet_SAST', 'PPLCNetV3', 'PPHGNet_small']
elif model_type == 'rec' or model_type == 'cls':
from .rec_lcnetv3 import PPLCNetV3
from .rec_mobilenet_v3 import MobileNetV3
from .rec_resnet_vd import ResNet
from .rec_resnet_fpn import ResNetFPN
from .rec_mv1_enhance import MobileNetV1Enhance
from .rec_nrtr_mtb import MTB
from .rec_resnet_31 import ResNet31
from .rec_svtrnet import SVTRNet
from .rec_vitstr import ViTSTR
from .rec_densenet import DenseNet
from .rec_lcnetv3 import PPLCNetV3
from .rec_hgnet import PPHGNet_small
support_dict = ['MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
'ResNet31', 'SVTRNet', 'ViTSTR', 'DenseNet', 'PPLCNetV3', 'PPHGNet_small']
elif model_type == 'e2e':
from .e2e_resnet_vd_pg import ResNet
support_dict = ['ResNet']
elif model_type == "table":
from .table_resnet_vd import ResNet
from .table_mobilenet_v3 import MobileNetV3
support_dict = ["ResNet", "MobileNetV3"]
from .rec_mv1_enhance import MobileNetV1Enhance
support_dict = [
"MobileNetV1Enhance",
"MobileNetV3",
"ResNet",
"ResNetFPN",
"MTB",
"ResNet31",
"SVTRNet",
"ViTSTR",
"DenseNet",
"PPLCNetV3",
"PPHGNet_small",
]
else:
raise NotImplementedError
module_name = config.pop('name')
module_name = config.pop("name")
assert module_name in support_dict, Exception(
'when model typs is {}, backbone only support {}'.format(model_type,
support_dict))
"when model typs is {}, backbone only support {}".format(
model_type, support_dict
)
)
module_class = eval(module_name)(**config)
return module_class
\ No newline at end of file
return module_class
import os, sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
from torch import nn
from ..common import Activation
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
......@@ -14,16 +13,18 @@ def make_divisible(v, divisor=8, min_value=None):
class ConvBNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1,
if_act=True,
act=None,
name=None):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1,
if_act=True,
act=None,
name=None,
):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.conv = nn.Conv2d(
......@@ -33,11 +34,12 @@ class ConvBNLayer(nn.Module):
stride=stride,
padding=padding,
groups=groups,
bias=False)
bias=False,
)
self.bn = nn.BatchNorm2d(
out_channels,
)
)
if self.if_act:
self.act = Activation(act_type=act, inplace=True)
......@@ -59,16 +61,18 @@ class SEModule(nn.Module):
kernel_size=1,
stride=1,
padding=0,
bias=True)
self.relu1 = Activation(act_type='relu', inplace=True)
bias=True,
)
self.relu1 = Activation(act_type="relu", inplace=True)
self.conv2 = nn.Conv2d(
in_channels=in_channels // reduction,
out_channels=in_channels,
kernel_size=1,
stride=1,
padding=0,
bias=True)
self.hard_sigmoid = Activation(act_type='hard_sigmoid', inplace=True)
bias=True,
)
self.hard_sigmoid = Activation(act_type="hard_sigmoid", inplace=True)
def forward(self, inputs):
outputs = self.avg_pool(inputs)
......@@ -81,15 +85,17 @@ class SEModule(nn.Module):
class ResidualUnit(nn.Module):
def __init__(self,
in_channels,
mid_channels,
out_channels,
kernel_size,
stride,
use_se,
act=None,
name=''):
def __init__(
self,
in_channels,
mid_channels,
out_channels,
kernel_size,
stride,
use_se,
act=None,
name="",
):
super(ResidualUnit, self).__init__()
self.if_shortcut = stride == 1 and in_channels == out_channels
self.if_se = use_se
......@@ -102,7 +108,8 @@ class ResidualUnit(nn.Module):
padding=0,
if_act=True,
act=act,
name=name + "_expand")
name=name + "_expand",
)
self.bottleneck_conv = ConvBNLayer(
in_channels=mid_channels,
out_channels=mid_channels,
......@@ -112,7 +119,8 @@ class ResidualUnit(nn.Module):
groups=mid_channels,
if_act=True,
act=act,
name=name + "_depthwise")
name=name + "_depthwise",
)
if self.if_se:
self.mid_se = SEModule(mid_channels, name=name + "_se")
self.linear_conv = ConvBNLayer(
......@@ -123,7 +131,8 @@ class ResidualUnit(nn.Module):
padding=0,
if_act=False,
act=None,
name=name + "_linear")
name=name + "_linear",
)
def forward(self, inputs):
x = self.expand_conv(inputs)
......@@ -137,12 +146,9 @@ class ResidualUnit(nn.Module):
class MobileNetV3(nn.Module):
def __init__(self,
in_channels=3,
model_name='large',
scale=0.5,
disable_se=False,
**kwargs):
def __init__(
self, in_channels=3, model_name="large", scale=0.5, disable_se=False, **kwargs
):
"""
the MobilenetV3 backbone network for detection module.
Args:
......@@ -155,46 +161,48 @@ class MobileNetV3(nn.Module):
if model_name == "large":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', 1],
[3, 64, 24, False, 'relu', 2],
[3, 72, 24, False, 'relu', 1],
[5, 72, 40, True, 'relu', 2],
[5, 120, 40, True, 'relu', 1],
[5, 120, 40, True, 'relu', 1],
[3, 240, 80, False, 'hard_swish', 2],
[3, 200, 80, False, 'hard_swish', 1],
[3, 184, 80, False, 'hard_swish', 1],
[3, 184, 80, False, 'hard_swish', 1],
[3, 480, 112, True, 'hard_swish', 1],
[3, 672, 112, True, 'hard_swish', 1],
[5, 672, 160, True, 'hard_swish', 2],
[5, 960, 160, True, 'hard_swish', 1],
[5, 960, 160, True, 'hard_swish', 1],
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1],
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1],
[3, 240, 80, False, "hard_swish", 2],
[3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, "hard_swish", 1],
[5, 672, 160, True, "hard_swish", 2],
[5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, "hard_swish", 1],
]
cls_ch_squeeze = 960
elif model_name == "small":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', 2],
[3, 72, 24, False, 'relu', 2],
[3, 88, 24, False, 'relu', 1],
[5, 96, 40, True, 'hard_swish', 2],
[5, 240, 40, True, 'hard_swish', 1],
[5, 240, 40, True, 'hard_swish', 1],
[5, 120, 48, True, 'hard_swish', 1],
[5, 144, 48, True, 'hard_swish', 1],
[5, 288, 96, True, 'hard_swish', 2],
[5, 576, 96, True, 'hard_swish', 1],
[5, 576, 96, True, 'hard_swish', 1],
[3, 16, 16, True, "relu", 2],
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1],
[5, 96, 40, True, "hard_swish", 2],
[5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, "hard_swish", 1],
[5, 288, 96, True, "hard_swish", 2],
[5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, "hard_swish", 1],
]
cls_ch_squeeze = 576
else:
raise NotImplementedError("mode[" + model_name +
"_model] is not implemented!")
raise NotImplementedError(
"mode[" + model_name + "_model] is not implemented!"
)
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
assert scale in supported_scale, \
"supported scale are {} but input scale is {}".format(supported_scale, scale)
assert (
scale in supported_scale
), "supported scale are {} but input scale is {}".format(supported_scale, scale)
inplanes = 16
# conv1
self.conv = ConvBNLayer(
......@@ -205,15 +213,16 @@ class MobileNetV3(nn.Module):
padding=1,
groups=1,
if_act=True,
act='hard_swish',
name='conv1')
act="hard_swish",
name="conv1",
)
self.stages = nn.ModuleList()
self.out_channels = []
block_list = []
i = 0
inplanes = make_divisible(inplanes * scale)
for (k, exp, c, se, nl, s) in cfg:
for k, exp, c, se, nl, s in cfg:
se = se and not self.disable_se
if s == 2 and i > 2:
self.out_channels.append(inplanes)
......@@ -228,7 +237,9 @@ class MobileNetV3(nn.Module):
stride=s,
use_se=se,
act=nl,
name="conv" + str(i + 2)))
name="conv" + str(i + 2),
)
)
inplanes = make_divisible(scale * c)
i += 1
block_list.append(
......@@ -240,8 +251,10 @@ class MobileNetV3(nn.Module):
padding=0,
groups=1,
if_act=True,
act='hard_swish',
name='conv_last'))
act="hard_swish",
name="conv_last",
)
)
self.stages.append(nn.Sequential(*block_list))
self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
# for i, stage in enumerate(self.stages):
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from .det_resnet_vd import DeformableConvV2, ConvBNLayer
class BottleneckBlock(nn.Module):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
is_dcn=False):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=1,
act="relu", )
self.conv1 = ConvBNLayer(
in_channels=num_filters,
out_channels=num_filters,
kernel_size=3,
stride=stride,
act="relu",
is_dcn=is_dcn,
# dcn_groups=1,
)
self.conv2 = ConvBNLayer(
in_channels=num_filters,
out_channels=num_filters * 4,
kernel_size=1,
act=None, )
if not shortcut:
self.short = ConvBNLayer(
in_channels=num_channels,
out_channels=num_filters * 4,
kernel_size=1,
stride=stride, )
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=3,
stride=stride,
act="relu")
self.conv1 = ConvBNLayer(
in_channels=num_filters,
out_channels=num_filters,
kernel_size=3,
act=None)
if not shortcut:
self.short = ConvBNLayer(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=1,
stride=stride)
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv1)
y = F.relu(y)
return y
class ResNet(nn.Module):
def __init__(self,
in_channels=3,
layers=50,
out_indices=None,
dcn_stage=None):
super(ResNet, self).__init__()
self.layers = layers
self.input_image_channel = in_channels
supported_layers = [18, 34, 50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.dcn_stage = dcn_stage if dcn_stage is not None else [
False, False, False, False
]
self.out_indices = out_indices if out_indices is not None else [
0, 1, 2, 3
]
self.conv = ConvBNLayer(
in_channels=self.input_image_channel,
out_channels=64,
kernel_size=7,
stride=2,
act="relu", )
self.pool2d_max = nn.MaxPool2d(
kernel_size=3,
stride=2,
padding=1, )
self.stages = nn.ModuleList()
self.out_channels = []
if layers >= 50:
for block in range(len(depth)):
shortcut = False
block_list = nn.Sequential()
is_dcn = self.dcn_stage[block]
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
is_dcn=is_dcn)
block_list.add_module(conv_name, bottleneck_block)
shortcut = True
if block in self.out_indices:
self.out_channels.append(num_filters[block] * 4)
self.stages.append(block_list)
else:
for block in range(len(depth)):
shortcut = False
block_list = nn.Sequential()
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = BasicBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut)
block_list.add_module(conv_name, basic_block)
shortcut = True
if block in self.out_indices:
self.out_channels.append(num_filters[block])
self.stages.append(block_list)
def forward(self, inputs):
y = self.conv(inputs)
y = self.pool2d_max(y)
out = []
for i, block in enumerate(self.stages):
y = block(y)
if i in self.out_indices:
out.append(y)
return out
import os, sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
import torchvision
class DeformableConvV2(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
weight_attr=None,
bias_attr=None,
lr_scale=1,
regularizer=None,
skip_quant=False,
dcn_bias_regularizer=None,
dcn_bias_lr_scale=2.):
super(DeformableConvV2, self).__init__()
self.offset_channel = 2 * kernel_size**2 * groups
self.mask_channel = kernel_size**2 * groups
if bias_attr:
# in FCOS-DCN head, specifically need learning_rate and regularizer
dcn_bias_attr = True
else:
# in ResNet backbone, do not need bias
dcn_bias_attr = False
self.conv_dcn = torchvision.ops.DeformConv2d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2 * dilation,
dilation=dilation,
groups=groups//2 if groups > 1 else 1,
bias=dcn_bias_attr)
self.conv_offset = nn.Conv2d(
in_channels,
groups * 3 * kernel_size**2,
kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
bias=True)
if skip_quant:
self.conv_offset.skip_quant = True
def forward(self, x):
offset_mask = self.conv_offset(x)
offset, mask = torch.split(
offset_mask,
split_size_or_sections=[self.offset_channel, self.mask_channel],
dim=1)
mask = torch.sigmoid(mask)
y = self.conv_dcn(x, offset, mask=mask)
return y
class ConvBNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
dcn_groups=1,
is_vd_mode=False,
act=None,
name=None,
is_dcn=False,
):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self.act = act
self._pool2d_avg = nn.AvgPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
if not is_dcn:
self._conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
else:
self._conv = DeformableConvV2(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=dcn_groups,
bias_attr=False)
self._batch_norm = nn.BatchNorm2d(
out_channels,
track_running_stats=True,
)
if act is not None:
self._act = Activation(act_type=act, inplace=True)
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act is not None:
y = self._act(y)
return y
class BottleneckBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None,
is_dcn=False,
):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b",
is_dcn=is_dcn,
dcn_groups=2,
)
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = short + conv1
y = F.relu(y)
return y
class ResNet_vd(nn.Module):
def __init__(self,
in_channels=3,
layers=50,
dcn_stage=None,
out_indices=None,
**kwargs):
super(ResNet_vd, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.dcn_stage = dcn_stage if dcn_stage is not None else [
False, False, False, False
]
self.out_indices = out_indices if out_indices is not None else [
0, 1, 2, 3
]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
in_channels=32,
out_channels=32,
kernel_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.stages = nn.ModuleList()
self.out_channels = []
if layers >= 50:
for block in range(len(depth)):
# block_list = []
block_list = nn.Sequential()
shortcut = False
is_dcn = self.dcn_stage[block]
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = BottleneckBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name,
is_dcn=is_dcn,
)
shortcut = True
block_list.add_module('bb_%d_%d' % (block, i), bottleneck_block)
if block in self.out_indices:
self.out_channels.append(num_filters[block] * 4)
# self.stages.append(nn.Sequential(*block_list))
self.stages.append(block_list)
else:
for block in range(len(depth)):
# block_list = []
block_list = nn.Sequential()
shortcut = False
# is_dcn = self.dcn_stage[block]
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = BasicBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
block_list.add_module('bb_%d_%d' % (block, i), basic_block)
# block_list.append(basic_block)
if block in self.out_indices:
self.out_channels.append(num_filters[block])
self.stages.append(block_list)
# self.stages.append(nn.Sequential(*block_list))
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
out = []
for i, block in enumerate(self.stages):
y = block(y)
if i in self.out_indices:
out.append(y)
return out
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
# import paddle
# from paddle import ParamAttr
# import paddle.nn as nn
# import paddle.nn.functional as F
__all__ = ["ResNet_SAST"]
class ConvBNLayer(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = nn.BatchNorm2d(
out_channels,)
self.act = act
if act is not None:
self._act = Activation(act_type=act)
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act:
y = self._act(y)
return y
class BottleneckBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv1)
y = F.relu(y)
return y
class ResNet_SAST(nn.Module):
def __init__(self, in_channels=3, layers=50, **kwargs):
super(ResNet_SAST, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
# depth = [3, 4, 6, 3]
depth = [3, 4, 6, 3, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
# num_channels = [64, 256, 512,
# 1024] if layers >= 50 else [64, 64, 128, 256]
# num_filters = [64, 128, 256, 512]
num_channels = [64, 256, 512,
1024, 2048] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512, 512]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
in_channels=32,
out_channels=32,
kernel_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
act='relu',
name="conv1_3")
# self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.stages = nn.ModuleList()
self.out_channels = [3, 64]
if layers >= 50:
for block in range(len(depth)):
# block_list = []
block_list = nn.Sequential()
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = BottleneckBlock(
in_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name
)
shortcut = True
# block_list.append(bottleneck_block)
block_list.add_module('bb_%d_%d' % (block, i), bottleneck_block)
self.out_channels.append(num_filters[block] * 4)
# self.stages.append(nn.Sequential(*block_list))
self.stages.append(block_list)
else:
for block in range(len(depth)):
# block_list = []
block_list = nn.Sequential()
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = BasicBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
# block_list.append(basic_block)
block_list.add_module('bb_%d_%d' % (block, i), basic_block)
self.out_channels.append(num_filters[block])
# self.stages.append(nn.Sequential(*block_list))
self.stages.append(block_list)
def forward(self, inputs):
out = [inputs]
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
out.append(y)
y = self.pool2d_max(y)
for block in self.stages:
y = block(y)
out.append(y)
return out
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
__all__ = ["ResNet"]
class ConvBNLayer(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = nn.BatchNorm2d(out_channels)
self.act = act
if self.act is not None:
self._act = Activation(act_type=self.act, inplace=True)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act is not None:
y = self._act(y)
return y
class BottleneckBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=stride,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv1)
y = F.relu(y)
return y
class ResNet(nn.Module):
def __init__(self, in_channels=3, layers=50, **kwargs):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
# depth = [3, 4, 6, 3]
depth = [3, 4, 6, 3, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512, 1024,
2048] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512, 512]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=64,
kernel_size=7,
stride=2,
act='relu',
name="conv1_1")
self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.stages = nn.ModuleList()
self.out_channels = [3, 64]
# num_filters = [64, 128, 256, 512, 512]
if layers >= 50:
for block in range(len(depth)):
block_list = nn.Sequential()
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneckBlock = BottleneckBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
block_list.add_module('bb_%d_%d' % (block, i), bottleneckBlock)
self.out_channels.append(num_filters[block] * 4)
self.stages.append(block_list)
else:
for block in range(len(depth)):
block_list = nn.Sequential()
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basicBlock = BasicBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
block_list.add_module('bb_%d_%d' % (block, i), basicBlock)
self.out_channels.append(num_filters[block])
self.stages.append(block_list)
def forward(self, inputs):
out = [inputs]
y = self.conv1_1(inputs)
out.append(y)
y = self.pool2d_max(y)
for block in self.stages:
y = block(y)
out.append(y)
return out
"""
This code is refer from:
https://github.com/LBH1024/CAN/models/densenet.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class Bottleneck(nn.Module):
def __init__(self, nChannels, growthRate, use_dropout):
super(Bottleneck, self).__init__()
interChannels = 4 * growthRate
self.bn1 = nn.BatchNorm2d(interChannels)
self.conv1 = nn.Conv2d(
nChannels, interChannels, kernel_size=1,
bias=True) # Xavier initialization
self.bn2 = nn.BatchNorm2d(growthRate)
self.conv2 = nn.Conv2d(
interChannels, growthRate, kernel_size=3, padding=1,
bias=True) # Xavier initialization
self.use_dropout = use_dropout
self.dropout = nn.Dropout(p=0.2)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
if self.use_dropout:
out = self.dropout(out)
out = F.relu(self.bn2(self.conv2(out)))
if self.use_dropout:
out = self.dropout(out)
out = torch.cat([x, out], 1)
return out
class SingleLayer(nn.Module):
def __init__(self, nChannels, growthRate, use_dropout):
super(SingleLayer, self).__init__()
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(
nChannels, growthRate, kernel_size=3, padding=1, bias=False)
self.use_dropout = use_dropout
self.dropout = nn.Dropout(p=0.2)
def forward(self, x):
out = self.conv1(F.relu(x))
if self.use_dropout:
out = self.dropout(out)
out = torch.cat([x, out], 1)
return out
class Transition(nn.Module):
def __init__(self, nChannels, out_channels, use_dropout):
super(Transition, self).__init__()
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv1 = nn.Conv2d(
nChannels, out_channels, kernel_size=1, bias=False)
self.use_dropout = use_dropout
self.dropout = nn.Dropout(p=0.2)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
if self.use_dropout:
out = self.dropout(out)
out = F.avg_pool2d(out, 2, ceil_mode=True, count_include_pad=False)
return out
class DenseNet(nn.Module):
def __init__(self, growthRate, reduction, bottleneck, use_dropout,
input_channel, **kwargs):
super(DenseNet, self).__init__()
nDenseBlocks = 16
nChannels = 2 * growthRate
self.conv1 = nn.Conv2d(
input_channel,
nChannels,
kernel_size=7,
padding=3,
stride=2,
bias=False)
self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks,
bottleneck, use_dropout)
nChannels += nDenseBlocks * growthRate
out_channels = int(math.floor(nChannels * reduction))
self.trans1 = Transition(nChannels, out_channels, use_dropout)
nChannels = out_channels
self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks,
bottleneck, use_dropout)
nChannels += nDenseBlocks * growthRate
out_channels = int(math.floor(nChannels * reduction))
self.trans2 = Transition(nChannels, out_channels, use_dropout)
nChannels = out_channels
self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks,
bottleneck, use_dropout)
self.out_channels = out_channels
def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck,
use_dropout):
layers = []
for i in range(int(nDenseBlocks)):
if bottleneck:
layers.append(Bottleneck(nChannels, growthRate, use_dropout))
else:
layers.append(SingleLayer(nChannels, growthRate, use_dropout))
nChannels += growthRate
return nn.Sequential(*layers)
def forward(self, inputs):
x, x_m, y = inputs
out = self.conv1(x)
out = F.relu(out, inplace=True)
out = F.max_pool2d(out, 2, ceil_mode=True)
out = self.dense1(out)
out = self.trans1(out)
out = self.dense2(out)
out = self.trans2(out)
out = self.dense3(out)
return out, x_m, y
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import nn
class ConvBNAct(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
groups=1,
use_act=True):
def __init__(
self, in_channels, out_channels, kernel_size, stride, groups=1, use_act=True
):
super().__init__()
self.use_act = use_act
self.conv = nn.Conv2d(
......@@ -20,7 +16,8 @@ class ConvBNAct(nn.Module):
stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
bias=False,
)
self.bn = nn.BatchNorm2d(out_channels)
if self.use_act:
self.act = nn.ReLU()
......@@ -42,7 +39,8 @@ class ESEModule(nn.Module):
out_channels=channels,
kernel_size=1,
stride=1,
padding=0)
padding=0,
)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
......@@ -55,12 +53,13 @@ class ESEModule(nn.Module):
class HG_Block(nn.Module):
def __init__(
self,
in_channels,
mid_channels,
out_channels,
layer_num,
identity=False, ):
self,
in_channels,
mid_channels,
out_channels,
layer_num,
identity=False,
):
super().__init__()
self.identity = identity
......@@ -70,14 +69,18 @@ class HG_Block(nn.Module):
in_channels=in_channels,
out_channels=mid_channels,
kernel_size=3,
stride=1))
stride=1,
)
)
for _ in range(layer_num - 1):
self.layers.append(
ConvBNAct(
in_channels=mid_channels,
out_channels=mid_channels,
kernel_size=3,
stride=1))
stride=1,
)
)
# feature aggregation
total_channels = in_channels + layer_num * mid_channels
......@@ -85,7 +88,8 @@ class HG_Block(nn.Module):
in_channels=total_channels,
out_channels=out_channels,
kernel_size=1,
stride=1)
stride=1,
)
self.att = ESEModule(out_channels)
def forward(self, x):
......@@ -104,14 +108,16 @@ class HG_Block(nn.Module):
class HG_Stage(nn.Module):
def __init__(self,
in_channels,
mid_channels,
out_channels,
block_num,
layer_num,
downsample=True,
stride=[2, 1]):
def __init__(
self,
in_channels,
mid_channels,
out_channels,
block_num,
layer_num,
downsample=True,
stride=[2, 1],
):
super().__init__()
self.downsample = downsample
if downsample:
......@@ -121,24 +127,19 @@ class HG_Stage(nn.Module):
kernel_size=3,
stride=stride,
groups=in_channels,
use_act=False)
use_act=False,
)
blocks_list = []
blocks_list.append(
HG_Block(
in_channels,
mid_channels,
out_channels,
layer_num,
identity=False))
HG_Block(in_channels, mid_channels, out_channels, layer_num, identity=False)
)
for _ in range(block_num - 1):
blocks_list.append(
HG_Block(
out_channels,
mid_channels,
out_channels,
layer_num,
identity=True))
out_channels, mid_channels, out_channels, layer_num, identity=True
)
)
self.blocks = nn.Sequential(*blocks_list)
def forward(self, x):
......@@ -164,29 +165,31 @@ class PPHGNet(nn.Module):
"""
def __init__(
self,
stem_channels,
stage_config,
layer_num,
in_channels=3,
det=False,
out_indices=None):
self,
stem_channels,
stage_config,
layer_num,
in_channels=3,
det=False,
out_indices=None,
):
super().__init__()
self.det = det
self.out_indices = out_indices if out_indices is not None else [
0, 1, 2, 3
]
self.out_indices = out_indices if out_indices is not None else [0, 1, 2, 3]
# stem
stem_channels.insert(0, in_channels)
self.stem = nn.Sequential(* [
ConvBNAct(
in_channels=stem_channels[i],
out_channels=stem_channels[i + 1],
kernel_size=3,
stride=2 if i == 0 else 1) for i in range(
len(stem_channels) - 1)
])
self.stem = nn.Sequential(
*[
ConvBNAct(
in_channels=stem_channels[i],
out_channels=stem_channels[i + 1],
kernel_size=3,
stride=2 if i == 0 else 1,
)
for i in range(len(stem_channels) - 1)
]
)
if self.det:
self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
......@@ -194,11 +197,25 @@ class PPHGNet(nn.Module):
self.stages = nn.ModuleList()
self.out_channels = []
for block_id, k in enumerate(stage_config):
in_channels, mid_channels, out_channels, block_num, downsample, stride = stage_config[
k]
(
in_channels,
mid_channels,
out_channels,
block_num,
downsample,
stride,
) = stage_config[k]
self.stages.append(
HG_Stage(in_channels, mid_channels, out_channels, block_num,
layer_num, downsample, stride))
HG_Stage(
in_channels,
mid_channels,
out_channels,
block_num,
layer_num,
downsample,
stride,
)
)
if block_id in self.out_indices:
self.out_channels.append(out_channels)
......@@ -237,32 +254,6 @@ class PPHGNet(nn.Module):
return x
def PPHGNet_tiny(pretrained=False, use_ssld=False, **kwargs):
"""
PPHGNet_tiny
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPHGNet_tiny` model depends on args.
"""
stage_config = {
# in_channels, mid_channels, out_channels, blocks, downsample
"stage1": [96, 96, 224, 1, False, [2, 1]],
"stage2": [224, 128, 448, 1, True, [1, 2]],
"stage3": [448, 160, 512, 2, True, [2, 1]],
"stage4": [512, 192, 768, 1, True, [2, 1]],
}
model = PPHGNet(
stem_channels=[48, 48, 96],
stage_config=stage_config,
layer_num=5,
**kwargs)
return model
def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs):
"""
PPHGNet_small
......@@ -294,31 +285,6 @@ def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs):
stage_config=stage_config_det if det else stage_config_rec,
layer_num=6,
det=det,
**kwargs)
return model
def PPHGNet_base(pretrained=False, use_ssld=True, **kwargs):
"""
PPHGNet_base
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPHGNet_base` model depends on args.
"""
stage_config = {
# in_channels, mid_channels, out_channels, blocks, downsample
"stage1": [160, 192, 320, 1, False, [2, 1]],
"stage2": [320, 224, 640, 2, True, [1, 2]],
"stage3": [640, 256, 960, 3, True, [2, 1]],
"stage4": [960, 288, 1280, 2, True, [2, 1]],
}
model = PPHGNet(
stem_channels=[96, 96, 160],
stage_config=stage_config,
layer_num=7,
**kwargs)
**kwargs
)
return model
......@@ -12,43 +12,54 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
from torch import nn
# from paddle.nn.initializer import Constant, KaimingNormal
# from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Dropout, Hardsigmoid, Hardswish, Identity, Linear, ReLU
# from paddle.regularizer import L2Decay
from ..common import Activation
NET_CONFIG_det = {
"blocks2":
#k, in_c, out_c, s, use_se
# k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
"blocks5":
[[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False]],
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True],
[5, 512, 512, 1, False], [5, 512, 512, 1, False]]
"blocks5": [
[3, 128, 256, 2, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
],
"blocks6": [
[5, 256, 512, 2, True],
[5, 512, 512, 1, True],
[5, 512, 512, 1, False],
[5, 512, 512, 1, False],
],
}
NET_CONFIG_rec = {
"blocks2":
#k, in_c, out_c, s, use_se
# k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
"blocks5":
[[3, 128, 256, (1, 2), False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
"blocks6": [[5, 256, 512, (2, 1), True], [5, 512, 512, 1, True],
[5, 512, 512, (2, 1), False], [5, 512, 512, 1, False]]
"blocks5": [
[3, 128, 256, (1, 2), False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
],
"blocks6": [
[5, 256, 512, (2, 1), True],
[5, 512, 512, 1, True],
[5, 512, 512, (2, 1), False],
[5, 512, 512, 1, False],
],
}
......@@ -62,8 +73,7 @@ def make_divisible(v, divisor=16, min_value=None):
class LearnableAffineBlock(nn.Module):
def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0,
lab_lr=0.1):
def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.1):
super().__init__()
self.scale = nn.Parameter(torch.Tensor([scale_value]))
self.bias = nn.Parameter(torch.Tensor([bias_value]))
......@@ -73,13 +83,9 @@ class LearnableAffineBlock(nn.Module):
class ConvBNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
groups=1,
lr_mult=1.0):
def __init__(
self, in_channels, out_channels, kernel_size, stride, groups=1, lr_mult=1.0
):
super().__init__()
self.conv = nn.Conv2d(
in_channels=in_channels,
......@@ -88,7 +94,8 @@ class ConvBNLayer(nn.Module):
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
bias=False,
)
self.bn = nn.BatchNorm2d(
out_channels,
......@@ -115,15 +122,17 @@ class Act(nn.Module):
class LearnableRepLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
num_conv_branches=1,
lr_mult=1.0,
lab_lr=0.1):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
num_conv_branches=1,
lr_mult=1.0,
lab_lr=0.1,
):
super().__init__()
self.is_repped = False
self.groups = groups
......@@ -134,27 +143,35 @@ class LearnableRepLayer(nn.Module):
self.num_conv_branches = num_conv_branches
self.padding = (kernel_size - 1) // 2
self.identity = nn.BatchNorm2d(
num_features=in_channels,
) if out_channels == in_channels and stride == 1 else None
self.identity = (
nn.BatchNorm2d(
num_features=in_channels,
)
if out_channels == in_channels and stride == 1
else None
)
self.conv_kxk = nn.ModuleList(
[
ConvBNLayer(
in_channels,
out_channels,
kernel_size,
stride,
groups=groups,
lr_mult=lr_mult,
)
for _ in range(self.num_conv_branches)
]
)
self.conv_kxk = nn.ModuleList([
self.conv_1x1 = (
ConvBNLayer(
in_channels,
out_channels,
kernel_size,
stride,
groups=groups,
lr_mult=lr_mult) for _ in range(self.num_conv_branches)
])
self.conv_1x1 = ConvBNLayer(
in_channels,
out_channels,
1,
stride,
groups=groups,
lr_mult=lr_mult) if kernel_size > 1 else None
in_channels, out_channels, 1, stride, groups=groups, lr_mult=lr_mult
)
if kernel_size > 1
else None
)
self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
......@@ -192,7 +209,8 @@ class LearnableRepLayer(nn.Module):
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
groups=self.groups)
groups=self.groups,
)
self.reparam_conv.weight.data = kernel
self.reparam_conv.bias.data = bias
self.is_repped = True
......@@ -205,8 +223,9 @@ class LearnableRepLayer(nn.Module):
def _get_kernel_bias(self):
kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(kernel_conv_1x1,
self.kernel_size // 2)
kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(
kernel_conv_1x1, self.kernel_size // 2
)
kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
......@@ -233,15 +252,16 @@ class LearnableRepLayer(nn.Module):
eps = branch.bn._epsilon
else:
assert isinstance(branch, nn.BatchNorm2d)
if not hasattr(self, 'id_tensor'):
if not hasattr(self, "id_tensor"):
input_dim = self.in_channels // self.groups
kernel_value = torch.zeros(
(self.in_channels, input_dim, self.kernel_size,
self.kernel_size),
dtype=branch.weight.dtype)
(self.in_channels, input_dim, self.kernel_size, self.kernel_size),
dtype=branch.weight.dtype,
)
for i in range(self.in_channels):
kernel_value[i, i % input_dim, self.kernel_size // 2,
self.kernel_size // 2] = 1
kernel_value[
i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
] = 1
self.id_tensor = kernel_value
kernel = self.id_tensor
running_mean = branch._mean
......@@ -287,15 +307,17 @@ class SELayer(nn.Module):
class LCNetV3Block(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
dw_size,
use_se=False,
conv_kxk_num=4,
lr_mult=1.0,
lab_lr=0.1):
def __init__(
self,
in_channels,
out_channels,
stride,
dw_size,
use_se=False,
conv_kxk_num=4,
lr_mult=1.0,
lab_lr=0.1,
):
super().__init__()
self.use_se = use_se
self.dw_conv = LearnableRepLayer(
......@@ -306,7 +328,8 @@ class LCNetV3Block(nn.Module):
groups=in_channels,
num_conv_branches=conv_kxk_num,
lr_mult=lr_mult,
lab_lr=lab_lr)
lab_lr=lab_lr,
)
if use_se:
self.se = SELayer(in_channels, lr_mult=lr_mult)
self.pw_conv = LearnableRepLayer(
......@@ -316,7 +339,8 @@ class LCNetV3Block(nn.Module):
stride=1,
num_conv_branches=conv_kxk_num,
lr_mult=lr_mult,
lab_lr=lab_lr)
lab_lr=lab_lr,
)
def forward(self, x):
x = self.dw_conv(x)
......@@ -327,13 +351,15 @@ class LCNetV3Block(nn.Module):
class PPLCNetV3(nn.Module):
def __init__(self,
scale=1.0,
conv_kxk_num=4,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
lab_lr=0.1,
det=False,
**kwargs):
def __init__(
self,
scale=1.0,
conv_kxk_num=4,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
lab_lr=0.1,
det=False,
**kwargs
):
super().__init__()
self.scale = scale
self.lr_mult_list = lr_mult_list
......@@ -341,90 +367,102 @@ class PPLCNetV3(nn.Module):
self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec
assert isinstance(self.lr_mult_list, (
list, tuple
)), "lr_mult_list should be in (list, tuple) but got {}".format(
type(self.lr_mult_list))
assert len(self.lr_mult_list
) == 6, "lr_mult_list length should be 6 but got {}".format(
len(self.lr_mult_list))
assert isinstance(
self.lr_mult_list, (list, tuple)
), "lr_mult_list should be in (list, tuple) but got {}".format(
type(self.lr_mult_list)
)
assert (
len(self.lr_mult_list) == 6
), "lr_mult_list length should be 6 but got {}".format(len(self.lr_mult_list))
self.conv1 = ConvBNLayer(
in_channels=3,
out_channels=make_divisible(16 * scale),
kernel_size=3,
stride=2,
lr_mult=self.lr_mult_list[0])
self.blocks2 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[1],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks2"])
])
self.blocks3 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[2],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks3"])
])
self.blocks4 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[3],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks4"])
])
self.blocks5 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[4],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks5"])
])
self.blocks6 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[5],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks6"])
])
lr_mult=self.lr_mult_list[0],
)
self.blocks2 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[1],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"])
]
)
self.blocks3 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[2],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"])
]
)
self.blocks4 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[3],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"])
]
)
self.blocks5 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[4],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"])
]
)
self.blocks6 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[5],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"])
]
)
self.out_channels = make_divisible(512 * scale)
if self.det:
......@@ -436,15 +474,19 @@ class PPLCNetV3(nn.Module):
make_divisible(self.net_config["blocks6"][-1][2] * scale),
]
self.layer_list = nn.ModuleList([
nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0)
])
self.layer_list = nn.ModuleList(
[
nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0),
]
)
self.out_channels = [
int(mv_c[0] * scale), int(mv_c[1] * scale),
int(mv_c[2] * scale), int(mv_c[3] * scale)
int(mv_c[0] * scale),
int(mv_c[1] * scale),
int(mv_c[2] * scale),
int(mv_c[3] * scale),
]
def forward(self, x):
......
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
NET_CONFIG_det = {
"blocks2":
# k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
"blocks5":
[[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False]],
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True],
[5, 512, 512, 1, False], [5, 512, 512, 1, False]]
}
NET_CONFIG_rec = {
"blocks2":
# k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
"blocks5":
[[3, 128, 256, (1, 2), False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
"blocks6": [[5, 256, 512, (2, 1), True], [5, 512, 512, 1, True],
[5, 512, 512, (2, 1), False], [5, 512, 512, 1, False]]
}
def make_divisible(v, divisor=16, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class LearnableAffineBlock(nn.Module):
def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0,
lab_lr=0.1):
super().__init__()
self.scale = nn.Parameter(torch.Tensor([scale_value]))
self.bias = nn.Parameter(torch.Tensor([bias_value]))
def forward(self, x):
return self.scale * x + self.bias
class ConvBNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
groups=1,
lr_mult=1.0):
super().__init__()
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class Act(nn.Module):
def __init__(self, act="hard_swish", lr_mult=1.0, lab_lr=0.1):
super().__init__()
assert act in ['hard_swish', 'relu']
self.act = Activation(act)
self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
def forward(self, x):
return self.lab(self.act(x))
class LearnableRepLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
num_conv_branches=1,
lr_mult=1.0,
lab_lr=0.1):
super().__init__()
self.is_repped = False
self.groups = groups
self.stride = stride
self.kernel_size = kernel_size
self.in_channels = in_channels
self.out_channels = out_channels
self.num_conv_branches = num_conv_branches
self.padding = (kernel_size - 1) // 2
self.identity = nn.BatchNorm2d(in_channels) if out_channels == in_channels and stride == 1 else None
self.conv_kxk = nn.ModuleList([
ConvBNLayer(
in_channels,
out_channels,
kernel_size,
stride,
groups=groups,
lr_mult=lr_mult) for _ in range(self.num_conv_branches)
])
self.conv_1x1 = ConvBNLayer(
in_channels,
out_channels,
1,
stride,
groups=groups,
lr_mult=lr_mult) if kernel_size > 1 else None
self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
def forward(self, x):
# for export
if self.is_repped:
out = self.lab(self.reparam_conv(x))
if self.stride != 2:
out = self.act(out)
return out
out = 0
if self.identity is not None:
out += self.identity(x)
if self.conv_1x1 is not None:
out += self.conv_1x1(x)
for conv in self.conv_kxk:
out += conv(x)
out = self.lab(out)
if self.stride != 2:
out = self.act(out)
return out
def rep(self):
if self.is_repped:
return
kernel, bias = self._get_kernel_bias()
self.reparam_conv = nn.Conv2d(
in_channels=self.in_channels,
out_channels=self.out_channels,
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
groups=self.groups)
self.reparam_conv.weight.data = kernel
self.reparam_conv.bias.data = bias
self.is_repped = True
def _pad_kernel_1x1_to_kxk(self, kernel1x1, pad):
if not isinstance(kernel1x1, torch.Tensor):
return 0
else:
return nn.functional.pad(kernel1x1, [pad, pad, pad, pad])
def _get_kernel_bias(self):
kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(kernel_conv_1x1,
self.kernel_size // 2)
kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
kernel_conv_kxk = 0
bias_conv_kxk = 0
for conv in self.conv_kxk:
kernel, bias = self._fuse_bn_tensor(conv)
kernel_conv_kxk += kernel
bias_conv_kxk += bias
kernel_reparam = kernel_conv_kxk + kernel_conv_1x1 + kernel_identity
bias_reparam = bias_conv_kxk + bias_conv_1x1 + bias_identity
return kernel_reparam, bias_reparam
def _fuse_bn_tensor(self, branch):
if not branch:
return 0, 0
elif isinstance(branch, ConvBNLayer):
kernel = branch.conv.weight
running_mean = branch.bn.running_mean
running_var = branch.bn.running_var
gamma = branch.bn.weight
beta = branch.bn.bias
eps = branch.bn.eps
else:
assert isinstance(branch, nn.BatchNorm2d)
if not hasattr(self, 'id_tensor'):
input_dim = self.in_channels // self.groups
kernel_value = torch.zeros(
(self.in_channels, input_dim, self.kernel_size,
self.kernel_size),
dtype=branch.weight.dtype)
for i in range(self.in_channels):
kernel_value[i, i % input_dim, self.kernel_size // 2,
self.kernel_size // 2] = 1
self.id_tensor = kernel_value
kernel = self.id_tensor
running_mean = branch.running_mean
running_var = branch.running_var
gamma = branch.weight
beta = branch.bias
eps = branch.eps
std = (running_var + eps).sqrt()
t = (gamma / std).reshape((-1, 1, 1, 1))
return kernel * t, beta - running_mean * gamma / std
class SELayer(nn.Module):
def __init__(self, channel, reduction=4, lr_mult=1.0):
super().__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv1 = nn.Conv2d(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0)
self.hardsigmoid = Activation('hard_sigmoid')
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
x = x * identity
return x
class LCNetV3Block(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
dw_size,
use_se=False,
conv_kxk_num=4,
lr_mult=1.0,
lab_lr=0.1):
super().__init__()
self.use_se = use_se
self.dw_conv = LearnableRepLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=dw_size,
stride=stride,
groups=in_channels,
num_conv_branches=conv_kxk_num,
lr_mult=lr_mult,
lab_lr=lab_lr)
if use_se:
self.se = SELayer(in_channels, lr_mult=lr_mult)
self.pw_conv = LearnableRepLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
num_conv_branches=conv_kxk_num,
lr_mult=lr_mult,
lab_lr=lab_lr)
def forward(self, x):
x = self.dw_conv(x)
if self.use_se:
x = self.se(x)
x = self.pw_conv(x)
return x
class PPLCNetV3(nn.Module):
def __init__(self,
scale=1.0,
conv_kxk_num=4,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
lab_lr=0.1,
det=False,
**kwargs):
super().__init__()
self.scale = scale
self.lr_mult_list = lr_mult_list
self.det = det
self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec
assert isinstance(self.lr_mult_list, (
list, tuple
)), "lr_mult_list should be in (list, tuple) but got {}".format(
type(self.lr_mult_list))
assert len(self.lr_mult_list
) == 6, "lr_mult_list length should be 6 but got {}".format(
len(self.lr_mult_list))
self.conv1 = ConvBNLayer(
in_channels=3,
out_channels=make_divisible(16 * scale),
kernel_size=3,
stride=2,
lr_mult=self.lr_mult_list[0])
self.blocks2 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[1],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks2"])
])
self.blocks3 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[2],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks3"])
])
self.blocks4 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[3],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks4"])
])
self.blocks5 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[4],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks5"])
])
self.blocks6 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[5],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks6"])
])
self.out_channels = make_divisible(512 * scale)
if self.det:
mv_c = [16, 24, 56, 480]
self.out_channels = [
make_divisible(self.net_config["blocks3"][-1][2] * scale),
make_divisible(self.net_config["blocks4"][-1][2] * scale),
make_divisible(self.net_config["blocks5"][-1][2] * scale),
make_divisible(self.net_config["blocks6"][-1][2] * scale),
]
self.layer_list = nn.ModuleList([
nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0)
])
self.out_channels = [
int(mv_c[0] * scale), int(mv_c[1] * scale),
int(mv_c[2] * scale), int(mv_c[3] * scale)
]
def forward(self, x):
out_list = []
x = self.conv1(x)
x = self.blocks2(x)
x = self.blocks3(x)
out_list.append(x)
x = self.blocks4(x)
out_list.append(x)
x = self.blocks5(x)
out_list.append(x)
import numpy as np
x = torch.Tensor(np.load('../PaddleOCR4debug/tmp.npy'))
x = self.blocks6(x)
out_list.append(x)
if self.det:
out_list[0] = self.layer_list[0](out_list[0])
out_list[1] = self.layer_list[1](out_list[1])
out_list[2] = self.layer_list[2](out_list[2])
out_list[3] = self.layer_list[3](out_list[3])
return out_list
if self.training:
x = F.adaptive_avg_pool2d(x, [1, 40])
else:
x = F.avg_pool2d(x, [3, 2])
return x
\ No newline at end of file
import os, sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
from torch import nn
from .det_mobilenet_v3 import ConvBNLayer, ResidualUnit, make_divisible
from .det_mobilenet_v3 import ResidualUnit, ConvBNLayer, make_divisible
class MobileNetV3(nn.Module):
def __init__(self,
in_channels=3,
model_name='small',
scale=0.5,
large_stride=None,
small_stride=None,
**kwargs):
def __init__(
self,
in_channels=3,
model_name="small",
scale=0.5,
large_stride=None,
small_stride=None,
**kwargs
):
super(MobileNetV3, self).__init__()
if small_stride is None:
small_stride = [2, 2, 2, 2]
if large_stride is None:
large_stride = [1, 2, 2, 2]
assert isinstance(large_stride, list), "large_stride type must " \
"be list but got {}".format(type(large_stride))
assert isinstance(small_stride, list), "small_stride type must " \
"be list but got {}".format(type(small_stride))
assert len(large_stride) == 4, "large_stride length must be " \
"4 but got {}".format(len(large_stride))
assert len(small_stride) == 4, "small_stride length must be " \
"4 but got {}".format(len(small_stride))
assert isinstance(
large_stride, list
), "large_stride type must " "be list but got {}".format(type(large_stride))
assert isinstance(
small_stride, list
), "small_stride type must " "be list but got {}".format(type(small_stride))
assert (
len(large_stride) == 4
), "large_stride length must be " "4 but got {}".format(len(large_stride))
assert (
len(small_stride) == 4
), "small_stride length must be " "4 but got {}".format(len(small_stride))
if model_name == "large":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', large_stride[0]],
[3, 64, 24, False, 'relu', (large_stride[1], 1)],
[3, 72, 24, False, 'relu', 1],
[5, 72, 40, True, 'relu', (large_stride[2], 1)],
[5, 120, 40, True, 'relu', 1],
[5, 120, 40, True, 'relu', 1],
[3, 240, 80, False, 'hard_swish', 1],
[3, 200, 80, False, 'hard_swish', 1],
[3, 184, 80, False, 'hard_swish', 1],
[3, 184, 80, False, 'hard_swish', 1],
[3, 480, 112, True, 'hard_swish', 1],
[3, 672, 112, True, 'hard_swish', 1],
[5, 672, 160, True, 'hard_swish', (large_stride[3], 1)],
[5, 960, 160, True, 'hard_swish', 1],
[5, 960, 160, True, 'hard_swish', 1],
[3, 16, 16, False, "relu", large_stride[0]],
[3, 64, 24, False, "relu", (large_stride[1], 1)],
[3, 72, 24, False, "relu", 1],
[5, 72, 40, True, "relu", (large_stride[2], 1)],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1],
[3, 240, 80, False, "hard_swish", 1],
[3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, "hard_swish", 1],
[5, 672, 160, True, "hard_swish", (large_stride[3], 1)],
[5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, "hard_swish", 1],
]
cls_ch_squeeze = 960
elif model_name == "small":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', (small_stride[0], 1)],
[3, 72, 24, False, 'relu', (small_stride[1], 1)],
[3, 88, 24, False, 'relu', 1],
[5, 96, 40, True, 'hard_swish', (small_stride[2], 1)],
[5, 240, 40, True, 'hard_swish', 1],
[5, 240, 40, True, 'hard_swish', 1],
[5, 120, 48, True, 'hard_swish', 1],
[5, 144, 48, True, 'hard_swish', 1],
[5, 288, 96, True, 'hard_swish', (small_stride[3], 1)],
[5, 576, 96, True, 'hard_swish', 1],
[5, 576, 96, True, 'hard_swish', 1],
[3, 16, 16, True, "relu", (small_stride[0], 1)],
[3, 72, 24, False, "relu", (small_stride[1], 1)],
[3, 88, 24, False, "relu", 1],
[5, 96, 40, True, "hard_swish", (small_stride[2], 1)],
[5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, "hard_swish", 1],
[5, 288, 96, True, "hard_swish", (small_stride[3], 1)],
[5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, "hard_swish", 1],
]
cls_ch_squeeze = 576
else:
raise NotImplementedError("mode[" + model_name +
"_model] is not implemented!")
raise NotImplementedError(
"mode[" + model_name + "_model] is not implemented!"
)
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
assert scale in supported_scale, \
"supported scales are {} but input scale is {}".format(supported_scale, scale)
assert (
scale in supported_scale
), "supported scales are {} but input scale is {}".format(
supported_scale, scale
)
inplanes = 16
# conv1
......@@ -83,12 +90,13 @@ class MobileNetV3(nn.Module):
padding=1,
groups=1,
if_act=True,
act='hard_swish',
name='conv1')
act="hard_swish",
name="conv1",
)
i = 0
block_list = []
inplanes = make_divisible(inplanes * scale)
for (k, exp, c, se, nl, s) in cfg:
for k, exp, c, se, nl, s in cfg:
block_list.append(
ResidualUnit(
in_channels=inplanes,
......@@ -98,7 +106,9 @@ class MobileNetV3(nn.Module):
stride=s,
use_se=se,
act=nl,
name='conv' + str(i + 2)))
name="conv" + str(i + 2),
)
)
inplanes = make_divisible(scale * c)
i += 1
self.blocks = nn.Sequential(*block_list)
......@@ -111,8 +121,9 @@ class MobileNetV3(nn.Module):
padding=0,
groups=1,
if_act=True,
act='hard_swish',
name='conv_last')
act="hard_swish",
name="conv_last",
)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.out_channels = make_divisible(scale * cls_ch_squeeze)
......@@ -122,4 +133,4 @@ class MobileNetV3(nn.Module):
x = self.blocks(x)
x = self.conv2(x)
x = self.pool(x)
return x
\ No newline at end of file
return x
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment