Commit b3d6785d authored by myhloli's avatar myhloli
Browse files

refactor(ocr): remove unused code and simplify model architecture

- Remove unused imports and code
- Simplify model architecture by removing unnecessary components
- Update initialization and forward pass logic
- Rename variables for consistency
parent 3cb156f5
...@@ -6,10 +6,9 @@ import numpy as np ...@@ -6,10 +6,9 @@ import numpy as np
from loguru import logger from loguru import logger
from magic_pdf.libs.config_reader import get_device from magic_pdf.libs.config_reader import get_device
from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.ocr_utils import check_img, preprocess_image, sorted_boxes, \ from .ocr_utils import check_img, preprocess_image, sorted_boxes, merge_det_boxes, update_det_boxes, get_rotate_crop_image
merge_det_boxes, update_det_boxes, get_rotate_crop_image from .tools.infer.predict_system import TextSystem
from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.tools.infer.predict_system import TextSystem from .tools.infer import pytorchocr_utility as utility
import tools.infer.pytorchocr_utility as utility
import argparse import argparse
...@@ -20,14 +19,9 @@ class PytorchPaddleOCR(TextSystem): ...@@ -20,14 +19,9 @@ class PytorchPaddleOCR(TextSystem):
self.lang = kwargs.get('lang', 'ch') self.lang = kwargs.get('lang', 'ch')
# kwargs['cls_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_mobile_v2.0_cls_infer.pth"
if self.lang == 'ch': if self.lang == 'ch':
kwargs['det_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_det_infer.pth" kwargs['det_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_det_infer.pth"
kwargs['rec_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_rec_infer.pth" kwargs['rec_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_rec_infer.pth"
kwargs['det_yaml_path'] = "/Users/myhloli/Downloads/PaddleOCR2Pytorch-main/configs/det/ch_PP-OCRv4/ch_PP-OCRv4_det_student.yml"
kwargs['rec_yaml_path'] = "/Users/myhloli/Downloads/PaddleOCR2Pytorch-main/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml"
kwargs['rec_image_shape'] = '3,48,320'
kwargs['device'] = get_device() kwargs['device'] = get_device()
......
import os, sys import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from collections import OrderedDict
import numpy as np
import cv2
import torch import torch
from .modeling.architectures.base_model import BaseModel
from pytorchocr.modeling.architectures.base_model import BaseModel
class BaseOCRV20: class BaseOCRV20:
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
...@@ -17,45 +12,6 @@ class BaseOCRV20: ...@@ -17,45 +12,6 @@ class BaseOCRV20:
def build_net(self, **kwargs): def build_net(self, **kwargs):
self.net = BaseModel(self.config, **kwargs) self.net = BaseModel(self.config, **kwargs)
def load_paddle_weights(self, weights_path):
raise NotImplementedError('implemented in converter.')
print('paddle weights loading...')
import paddle.fluid as fluid
with fluid.dygraph.guard():
para_state_dict, opti_state_dict = fluid.load_dygraph(weights_path)
for k,v in self.net.state_dict().items():
name = k
if name.endswith('num_batches_tracked'):
continue
if name.endswith('running_mean'):
ppname = name.replace('running_mean', '_mean')
elif name.endswith('running_var'):
ppname = name.replace('running_var', '_variance')
elif name.endswith('bias') or name.endswith('weight'):
ppname = name
elif 'lstm' in name:
ppname = name
else:
print('Redundance:')
print(name)
raise ValueError
try:
if ppname.endswith('fc.weight'):
self.net.state_dict()[k].copy_(torch.Tensor(para_state_dict[ppname].T))
else:
self.net.state_dict()[k].copy_(torch.Tensor(para_state_dict[ppname]))
except Exception as e:
print('pytorch: {}, {}'.format(k, v.size()))
print('paddle: {}, {}'.format(ppname, para_state_dict[ppname].shape))
raise e
print('model is loaded: {}'.format(weights_path))
def read_pytorch_weights(self, weights_path): def read_pytorch_weights(self, weights_path):
if not os.path.exists(weights_path): if not os.path.exists(weights_path):
raise FileNotFoundError('{} is not existed.'.format(weights_path)) raise FileNotFoundError('{} is not existed.'.format(weights_path))
...@@ -74,38 +30,9 @@ class BaseOCRV20: ...@@ -74,38 +30,9 @@ class BaseOCRV20:
print('weights is loaded.') print('weights is loaded.')
def load_pytorch_weights(self, weights_path): def load_pytorch_weights(self, weights_path):
self.net.load_state_dict(torch.load(weights_path)) self.net.load_state_dict(torch.load(weights_path, weights_only=True))
print('model is loaded: {}'.format(weights_path)) print('model is loaded: {}'.format(weights_path))
def save_pytorch_weights(self, weights_path):
try:
torch.save(self.net.state_dict(), weights_path, _use_new_zipfile_serialization=False)
except:
torch.save(self.net.state_dict(), weights_path) # _use_new_zipfile_serialization=False for torch>=1.6.0
print('model is saved: {}'.format(weights_path))
def print_pytorch_state_dict(self):
print('pytorch:')
for k,v in self.net.state_dict().items():
print('{}----{}'.format(k,type(v)))
def read_paddle_weights(self, weights_path):
import paddle.fluid as fluid
with fluid.dygraph.guard():
para_state_dict, opti_state_dict = fluid.load_dygraph(weights_path)
return para_state_dict, opti_state_dict
def print_paddle_state_dict(self, weights_path):
import paddle.fluid as fluid
with fluid.dygraph.guard():
para_state_dict, opti_state_dict = fluid.load_dygraph(weights_path)
print('paddle"')
for k,v in para_state_dict.items():
print('{}----{}'.format(k,type(v)))
def inference(self, inputs): def inference(self, inputs):
with torch.no_grad(): with torch.no_grad():
infer = self.net(inputs) infer = self.net(inputs)
......
...@@ -18,7 +18,6 @@ import copy ...@@ -18,7 +18,6 @@ import copy
# from paddle.io import Dataset, DataLoader, BatchSampler, DistributedBatchSampler # from paddle.io import Dataset, DataLoader, BatchSampler, DistributedBatchSampler
# import paddle.distributed as dist # import paddle.distributed as dist
from pytorchocr.data.imaug import transform, create_operators from .imaug import transform, create_operators
# from pytorchocr.data.simple_dataset import SimpleDataSet
# from pytorchocr.data.lmdb_dataset import LMDBDateSet
...@@ -15,7 +15,7 @@ from .operators import * ...@@ -15,7 +15,7 @@ from .operators import *
# from .east_process import * # from .east_process import *
# from .sast_process import * # from .sast_process import *
from .gen_table_mask import * # from .gen_table_mask import *
def transform(data, ops=None): def transform(data, ops=None):
""" transform """ """ transform """
......
"""
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import sys
import six
import cv2
import numpy as np
class GenTableMask(object):
""" gen table mask """
def __init__(self, shrink_h_max, shrink_w_max, mask_type=0, **kwargs):
self.shrink_h_max = 5
self.shrink_w_max = 5
self.mask_type = mask_type
def projection(self, erosion, h, w, spilt_threshold=0):
# 水平投影
projection_map = np.ones_like(erosion)
project_val_array = [0 for _ in range(0, h)]
for j in range(0, h):
for i in range(0, w):
if erosion[j, i] == 255:
project_val_array[j] += 1
# 根据数组,获取切割点
start_idx = 0 # 记录进入字符区的索引
end_idx = 0 # 记录进入空白区域的索引
in_text = False # 是否遍历到了字符区内
box_list = []
for i in range(len(project_val_array)):
if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了
in_text = True
start_idx = i
elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了
end_idx = i
in_text = False
if end_idx - start_idx <= 2:
continue
box_list.append((start_idx, end_idx + 1))
if in_text:
box_list.append((start_idx, h - 1))
# 绘制投影直方图
for j in range(0, h):
for i in range(0, project_val_array[j]):
projection_map[j, i] = 0
return box_list, projection_map
def projection_cx(self, box_img):
box_gray_img = cv2.cvtColor(box_img, cv2.COLOR_BGR2GRAY)
h, w = box_gray_img.shape
# 灰度图片进行二值化处理
ret, thresh1 = cv2.threshold(box_gray_img, 200, 255, cv2.THRESH_BINARY_INV)
# 纵向腐蚀
if h < w:
kernel = np.ones((2, 1), np.uint8)
erode = cv2.erode(thresh1, kernel, iterations=1)
else:
erode = thresh1
# 水平膨胀
kernel = np.ones((1, 5), np.uint8)
erosion = cv2.dilate(erode, kernel, iterations=1)
# 水平投影
projection_map = np.ones_like(erosion)
project_val_array = [0 for _ in range(0, h)]
for j in range(0, h):
for i in range(0, w):
if erosion[j, i] == 255:
project_val_array[j] += 1
# 根据数组,获取切割点
start_idx = 0 # 记录进入字符区的索引
end_idx = 0 # 记录进入空白区域的索引
in_text = False # 是否遍历到了字符区内
box_list = []
spilt_threshold = 0
for i in range(len(project_val_array)):
if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了
in_text = True
start_idx = i
elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了
end_idx = i
in_text = False
if end_idx - start_idx <= 2:
continue
box_list.append((start_idx, end_idx + 1))
if in_text:
box_list.append((start_idx, h - 1))
# 绘制投影直方图
for j in range(0, h):
for i in range(0, project_val_array[j]):
projection_map[j, i] = 0
split_bbox_list = []
if len(box_list) > 1:
for i, (h_start, h_end) in enumerate(box_list):
if i == 0:
h_start = 0
if i == len(box_list):
h_end = h
word_img = erosion[h_start:h_end + 1, :]
word_h, word_w = word_img.shape
w_split_list, w_projection_map = self.projection(word_img.T, word_w, word_h)
w_start, w_end = w_split_list[0][0], w_split_list[-1][1]
if h_start > 0:
h_start -= 1
h_end += 1
word_img = box_img[h_start:h_end + 1:, w_start:w_end + 1, :]
split_bbox_list.append([w_start, h_start, w_end, h_end])
else:
split_bbox_list.append([0, 0, w, h])
return split_bbox_list
def shrink_bbox(self, bbox):
left, top, right, bottom = bbox
sh_h = min(max(int((bottom - top) * 0.1), 1), self.shrink_h_max)
sh_w = min(max(int((right - left) * 0.1), 1), self.shrink_w_max)
left_new = left + sh_w
right_new = right - sh_w
top_new = top + sh_h
bottom_new = bottom - sh_h
if left_new >= right_new:
left_new = left
right_new = right
if top_new >= bottom_new:
top_new = top
bottom_new = bottom
return [left_new, top_new, right_new, bottom_new]
def __call__(self, data):
img = data['image']
cells = data['cells']
height, width = img.shape[0:2]
if self.mask_type == 1:
mask_img = np.zeros((height, width), dtype=np.float32)
else:
mask_img = np.zeros((height, width, 3), dtype=np.float32)
cell_num = len(cells)
for cno in range(cell_num):
if "bbox" in cells[cno]:
bbox = cells[cno]['bbox']
left, top, right, bottom = bbox
box_img = img[top:bottom, left:right, :].copy()
split_bbox_list = self.projection_cx(box_img)
for sno in range(len(split_bbox_list)):
split_bbox_list[sno][0] += left
split_bbox_list[sno][1] += top
split_bbox_list[sno][2] += left
split_bbox_list[sno][3] += top
for sno in range(len(split_bbox_list)):
left, top, right, bottom = split_bbox_list[sno]
left, top, right, bottom = self.shrink_bbox([left, top, right, bottom])
if self.mask_type == 1:
mask_img[top:bottom, left:right] = 1.0
data['mask_img'] = mask_img
else:
mask_img[top:bottom, left:right, :] = (255, 255, 255)
data['image'] = mask_img
return data
class ResizeTableImage(object):
def __init__(self, max_len, **kwargs):
super(ResizeTableImage, self).__init__()
self.max_len = max_len
def get_img_bbox(self, cells):
bbox_list = []
if len(cells) == 0:
return bbox_list
cell_num = len(cells)
for cno in range(cell_num):
if "bbox" in cells[cno]:
bbox = cells[cno]['bbox']
bbox_list.append(bbox)
return bbox_list
def resize_img_table(self, img, bbox_list, max_len):
height, width = img.shape[0:2]
ratio = max_len / (max(height, width) * 1.0)
resize_h = int(height * ratio)
resize_w = int(width * ratio)
img_new = cv2.resize(img, (resize_w, resize_h))
bbox_list_new = []
for bno in range(len(bbox_list)):
left, top, right, bottom = bbox_list[bno].copy()
left = int(left * ratio)
top = int(top * ratio)
right = int(right * ratio)
bottom = int(bottom * ratio)
bbox_list_new.append([left, top, right, bottom])
return img_new, bbox_list_new
def __call__(self, data):
img = data['image']
if 'cells' not in data:
cells = []
else:
cells = data['cells']
bbox_list = self.get_img_bbox(cells)
img_new, bbox_list_new = self.resize_img_table(img, bbox_list, self.max_len)
data['image'] = img_new
cell_num = len(cells)
bno = 0
for cno in range(cell_num):
if "bbox" in data['cells'][cno]:
data['cells'][cno]['bbox'] = bbox_list_new[bno]
bno += 1
data['max_len'] = self.max_len
return data
class PaddingTableImage(object):
def __init__(self, **kwargs):
super(PaddingTableImage, self).__init__()
def __call__(self, data):
img = data['image']
max_len = data['max_len']
padding_img = np.zeros((max_len, max_len, 3), dtype=np.float32)
height, width = img.shape[0:2]
padding_img[0:height, 0:width, :] = img.copy()
data['image'] = padding_img
return data
\ No newline at end of file
ch_ptocr_mobile_v2.0_cls_infer:
model_type: cls
algorithm: CLS
Transform:
Backbone:
name: MobileNetV3
scale: 0.35
model_name: small
Neck:
Head:
name: ClsHead
class_dim: 2
Multilingual_PP-OCRv3_det_infer:
model_type: det
algorithm: DB
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
disable_se: True
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
Head:
name: DBHead
k: 50
en_PP-OCRv3_det_infer:
model_type: det
algorithm: DB
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
disable_se: True
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
Head:
name: DBHead
k: 50
en_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR_LCNet
Transform:
Backbone:
name: PPLCNetV3
scale: 0.95
Head:
name: MultiHead
out_channels_list:
CTCLabelDecode: 97 #'blank' + ...(62) + ' '
head_list:
- CTCHead:
Neck:
name: svtr
dims: 120
depth: 2
hidden_dims: 120
kernel_size: [ 1, 3 ]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: 25
ch_PP-OCRv4_det_infer:
model_type: det
algorithm: DB
Transform: null
Backbone:
name: PPLCNetV3
scale: 0.75
det: True
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
Head:
name: DBHead
k: 50
ch_PP-OCRv4_det_server_infer:
model_type: det
algorithm: DB
Transform: null
Backbone:
name: PPHGNet_small
det: True
Neck:
name: LKPAN
out_channels: 256
intracl: true
Head:
name: PFHeadLocal
k: 50
mode: "large"
ch_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR_LCNet
Transform:
Backbone:
name: PPLCNetV3
scale: 0.95
Head:
name: MultiHead
out_channels_list:
CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
head_list:
- CTCHead:
Neck:
name: svtr
dims: 120
depth: 2
hidden_dims: 120
kernel_size: [ 1, 3 ]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: 25
ch_PP-OCRv4_rec_server_infer:
model_type: rec
algorithm: SVTR_HGNet
Transform:
Backbone:
name: PPHGNet_small
Head:
name: MultiHead
out_channels_list:
CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
head_list:
- CTCHead:
Neck:
name: svtr
dims: 120
depth: 2
hidden_dims: 120
kernel_size: [ 1, 3 ]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: 25
chinese_cht_PP-OCRv3_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [1, 2]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 8423
fc_decay: 0.00001
latin_PP-OCRv3_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 187
fc_decay: 0.00001
cyrillic_PP-OCRv3_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 165
fc_decay: 0.00001
arabic_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 164
fc_decay: 0.00001
korean_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 3690
fc_decay: 0.00001
japan_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 4401
fc_decay: 0.00001
ta_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 130
fc_decay: 0.00001
te_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 153
fc_decay: 0.00001
ka_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 155
fc_decay: 0.00001
devanagari_PP-OCRv4_rec_infer:
model_type: rec
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
Neck:
name: SequenceEncoder
encoder_type: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
Head:
name: CTCHead
out_channels: 169
fc_decay: 0.00001
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import copy import copy
__all__ = ['build_model'] __all__ = ["build_model"]
def build_model(config, **kwargs): def build_model(config, **kwargs):
...@@ -22,4 +22,4 @@ def build_model(config, **kwargs): ...@@ -22,4 +22,4 @@ def build_model(config, **kwargs):
config = copy.deepcopy(config) config = copy.deepcopy(config)
module_class = BaseModel(config, **kwargs) module_class = BaseModel(config, **kwargs)
return module_class return module_class
\ No newline at end of file
import os, sys from torch import nn
# import torch
import torch.nn as nn from ..backbones import build_backbone
# import torch.nn.functional as F from ..heads import build_head
# from pytorchocr.modeling.common import Activation from ..necks import build_neck
from pytorchocr.modeling.transforms import build_transform
from pytorchocr.modeling.backbones import build_backbone
from pytorchocr.modeling.necks import build_neck
from pytorchocr.modeling.heads import build_head
class BaseModel(nn.Module): class BaseModel(nn.Module):
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
...@@ -18,27 +14,14 @@ class BaseModel(nn.Module): ...@@ -18,27 +14,14 @@ class BaseModel(nn.Module):
""" """
super(BaseModel, self).__init__() super(BaseModel, self).__init__()
in_channels = config.get('in_channels', 3) in_channels = config.get("in_channels", 3)
model_type = config['model_type'] model_type = config["model_type"]
# build transfrom,
# for rec, transfrom can be TPS,None
# for det and cls, transfrom shoule to be None,
# if you make model differently, you can use transfrom in det and cls
if 'Transform' not in config or config['Transform'] is None:
self.use_transform = False
else:
self.use_transform = True
config['Transform']['in_channels'] = in_channels
self.transform = build_transform(config['Transform'])
in_channels = self.transform.out_channels
# raise NotImplementedError
# build backbone, backbone is need for del, rec and cls # build backbone, backbone is need for del, rec and cls
if 'Backbone' not in config or config['Backbone'] is None: if "Backbone" not in config or config["Backbone"] is None:
self.use_backbone = False self.use_backbone = False
else: else:
self.use_backbone = True self.use_backbone = True
config["Backbone"]['in_channels'] = in_channels config["Backbone"]["in_channels"] = in_channels
self.backbone = build_backbone(config["Backbone"], model_type) self.backbone = build_backbone(config["Backbone"], model_type)
in_channels = self.backbone.out_channels in_channels = self.backbone.out_channels
...@@ -46,20 +29,20 @@ class BaseModel(nn.Module): ...@@ -46,20 +29,20 @@ class BaseModel(nn.Module):
# for rec, neck can be cnn,rnn or reshape(None) # for rec, neck can be cnn,rnn or reshape(None)
# for det, neck can be FPN, BIFPN and so on. # for det, neck can be FPN, BIFPN and so on.
# for cls, neck should be none # for cls, neck should be none
if 'Neck' not in config or config['Neck'] is None: if "Neck" not in config or config["Neck"] is None:
self.use_neck = False self.use_neck = False
else: else:
self.use_neck = True self.use_neck = True
config['Neck']['in_channels'] = in_channels config["Neck"]["in_channels"] = in_channels
self.neck = build_neck(config['Neck']) self.neck = build_neck(config["Neck"])
in_channels = self.neck.out_channels in_channels = self.neck.out_channels
# # build head, head is need for det, rec and cls # # build head, head is need for det, rec and cls
if 'Head' not in config or config['Head'] is None: if "Head" not in config or config["Head"] is None:
self.use_head = False self.use_head = False
else: else:
self.use_head = True self.use_head = True
config["Head"]['in_channels'] = in_channels config["Head"]["in_channels"] = in_channels
self.head = build_head(config["Head"], **kwargs) self.head = build_head(config["Head"], **kwargs)
self.return_all_feats = config.get("return_all_feats", False) self.return_all_feats = config.get("return_all_feats", False)
...@@ -70,7 +53,7 @@ class BaseModel(nn.Module): ...@@ -70,7 +53,7 @@ class BaseModel(nn.Module):
# weight initialization # weight initialization
for m in self.modules(): for m in self.modules():
if isinstance(m, nn.Conv2d): if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out') nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None: if m.bias is not None:
nn.init.zeros_(m.bias) nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d): elif isinstance(m, nn.BatchNorm2d):
...@@ -81,15 +64,12 @@ class BaseModel(nn.Module): ...@@ -81,15 +64,12 @@ class BaseModel(nn.Module):
if m.bias is not None: if m.bias is not None:
nn.init.zeros_(m.bias) nn.init.zeros_(m.bias)
elif isinstance(m, nn.ConvTranspose2d): elif isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out') nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None: if m.bias is not None:
nn.init.zeros_(m.bias) nn.init.zeros_(m.bias)
def forward(self, x): def forward(self, x):
y = dict() y = dict()
if self.use_transform:
x = self.transform(x)
if self.use_backbone: if self.use_backbone:
x = self.backbone(x) x = self.backbone(x)
if isinstance(x, dict): if isinstance(x, dict):
...@@ -107,9 +87,9 @@ class BaseModel(nn.Module): ...@@ -107,9 +87,9 @@ class BaseModel(nn.Module):
if self.use_head: if self.use_head:
x = self.head(x) x = self.head(x)
# for multi head, save ctc neck out for udml # for multi head, save ctc neck out for udml
if isinstance(x, dict) and 'ctc_nect' in x.keys(): if isinstance(x, dict) and "ctc_nect" in x.keys():
y['neck_out'] = x['ctc_neck'] y["neck_out"] = x["ctc_neck"]
y['head_out'] = x y["head_out"] = x
elif isinstance(x, dict): elif isinstance(x, dict):
y.update(x) y.update(x)
else: else:
...@@ -122,4 +102,4 @@ class BaseModel(nn.Module): ...@@ -122,4 +102,4 @@ class BaseModel(nn.Module):
else: else:
return {final_name: x} return {final_name: x}
else: else:
return x return x
\ No newline at end of file
...@@ -12,45 +12,51 @@ ...@@ -12,45 +12,51 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ['build_backbone'] __all__ = ["build_backbone"]
def build_backbone(config, model_type): def build_backbone(config, model_type):
if model_type == 'det': if model_type == "det":
from .det_mobilenet_v3 import MobileNetV3 from .det_mobilenet_v3 import MobileNetV3
from .det_resnet import ResNet from .rec_hgnet import PPHGNet_small
from .det_resnet_vd import ResNet_vd
from .det_resnet_vd_sast import ResNet_SAST
from .rec_lcnetv3 import PPLCNetV3 from .rec_lcnetv3 import PPLCNetV3
support_dict = [
"MobileNetV3",
"ResNet",
"ResNet_vd",
"ResNet_SAST",
"PPLCNetV3",
"PPHGNet_small",
]
elif model_type == "rec" or model_type == "cls":
from .rec_hgnet import PPHGNet_small from .rec_hgnet import PPHGNet_small
support_dict = ['MobileNetV3', 'ResNet', 'ResNet_vd', 'ResNet_SAST', 'PPLCNetV3', 'PPHGNet_small'] from .rec_lcnetv3 import PPLCNetV3
elif model_type == 'rec' or model_type == 'cls':
from .rec_mobilenet_v3 import MobileNetV3 from .rec_mobilenet_v3 import MobileNetV3
from .rec_resnet_vd import ResNet
from .rec_resnet_fpn import ResNetFPN
from .rec_mv1_enhance import MobileNetV1Enhance
from .rec_nrtr_mtb import MTB
from .rec_resnet_31 import ResNet31
from .rec_svtrnet import SVTRNet from .rec_svtrnet import SVTRNet
from .rec_vitstr import ViTSTR from .rec_mv1_enhance import MobileNetV1Enhance
from .rec_densenet import DenseNet
from .rec_lcnetv3 import PPLCNetV3 support_dict = [
from .rec_hgnet import PPHGNet_small "MobileNetV1Enhance",
support_dict = ['MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB', "MobileNetV3",
'ResNet31', 'SVTRNet', 'ViTSTR', 'DenseNet', 'PPLCNetV3', 'PPHGNet_small'] "ResNet",
elif model_type == 'e2e': "ResNetFPN",
from .e2e_resnet_vd_pg import ResNet "MTB",
support_dict = ['ResNet'] "ResNet31",
elif model_type == "table": "SVTRNet",
from .table_resnet_vd import ResNet "ViTSTR",
from .table_mobilenet_v3 import MobileNetV3 "DenseNet",
support_dict = ["ResNet", "MobileNetV3"] "PPLCNetV3",
"PPHGNet_small",
]
else: else:
raise NotImplementedError raise NotImplementedError
module_name = config.pop('name') module_name = config.pop("name")
assert module_name in support_dict, Exception( assert module_name in support_dict, Exception(
'when model typs is {}, backbone only support {}'.format(model_type, "when model typs is {}, backbone only support {}".format(
support_dict)) model_type, support_dict
)
)
module_class = eval(module_name)(**config) module_class = eval(module_name)(**config)
return module_class return module_class
\ No newline at end of file
import os, sys from torch import nn
import torch
import torch.nn as nn from ..common import Activation
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
def make_divisible(v, divisor=8, min_value=None): def make_divisible(v, divisor=8, min_value=None):
if min_value is None: if min_value is None:
...@@ -14,16 +13,18 @@ def make_divisible(v, divisor=8, min_value=None): ...@@ -14,16 +13,18 @@ def make_divisible(v, divisor=8, min_value=None):
class ConvBNLayer(nn.Module): class ConvBNLayer(nn.Module):
def __init__(self, def __init__(
in_channels, self,
out_channels, in_channels,
kernel_size, out_channels,
stride, kernel_size,
padding, stride,
groups=1, padding,
if_act=True, groups=1,
act=None, if_act=True,
name=None): act=None,
name=None,
):
super(ConvBNLayer, self).__init__() super(ConvBNLayer, self).__init__()
self.if_act = if_act self.if_act = if_act
self.conv = nn.Conv2d( self.conv = nn.Conv2d(
...@@ -33,11 +34,12 @@ class ConvBNLayer(nn.Module): ...@@ -33,11 +34,12 @@ class ConvBNLayer(nn.Module):
stride=stride, stride=stride,
padding=padding, padding=padding,
groups=groups, groups=groups,
bias=False) bias=False,
)
self.bn = nn.BatchNorm2d( self.bn = nn.BatchNorm2d(
out_channels, out_channels,
) )
if self.if_act: if self.if_act:
self.act = Activation(act_type=act, inplace=True) self.act = Activation(act_type=act, inplace=True)
...@@ -59,16 +61,18 @@ class SEModule(nn.Module): ...@@ -59,16 +61,18 @@ class SEModule(nn.Module):
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0, padding=0,
bias=True) bias=True,
self.relu1 = Activation(act_type='relu', inplace=True) )
self.relu1 = Activation(act_type="relu", inplace=True)
self.conv2 = nn.Conv2d( self.conv2 = nn.Conv2d(
in_channels=in_channels // reduction, in_channels=in_channels // reduction,
out_channels=in_channels, out_channels=in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0, padding=0,
bias=True) bias=True,
self.hard_sigmoid = Activation(act_type='hard_sigmoid', inplace=True) )
self.hard_sigmoid = Activation(act_type="hard_sigmoid", inplace=True)
def forward(self, inputs): def forward(self, inputs):
outputs = self.avg_pool(inputs) outputs = self.avg_pool(inputs)
...@@ -81,15 +85,17 @@ class SEModule(nn.Module): ...@@ -81,15 +85,17 @@ class SEModule(nn.Module):
class ResidualUnit(nn.Module): class ResidualUnit(nn.Module):
def __init__(self, def __init__(
in_channels, self,
mid_channels, in_channels,
out_channels, mid_channels,
kernel_size, out_channels,
stride, kernel_size,
use_se, stride,
act=None, use_se,
name=''): act=None,
name="",
):
super(ResidualUnit, self).__init__() super(ResidualUnit, self).__init__()
self.if_shortcut = stride == 1 and in_channels == out_channels self.if_shortcut = stride == 1 and in_channels == out_channels
self.if_se = use_se self.if_se = use_se
...@@ -102,7 +108,8 @@ class ResidualUnit(nn.Module): ...@@ -102,7 +108,8 @@ class ResidualUnit(nn.Module):
padding=0, padding=0,
if_act=True, if_act=True,
act=act, act=act,
name=name + "_expand") name=name + "_expand",
)
self.bottleneck_conv = ConvBNLayer( self.bottleneck_conv = ConvBNLayer(
in_channels=mid_channels, in_channels=mid_channels,
out_channels=mid_channels, out_channels=mid_channels,
...@@ -112,7 +119,8 @@ class ResidualUnit(nn.Module): ...@@ -112,7 +119,8 @@ class ResidualUnit(nn.Module):
groups=mid_channels, groups=mid_channels,
if_act=True, if_act=True,
act=act, act=act,
name=name + "_depthwise") name=name + "_depthwise",
)
if self.if_se: if self.if_se:
self.mid_se = SEModule(mid_channels, name=name + "_se") self.mid_se = SEModule(mid_channels, name=name + "_se")
self.linear_conv = ConvBNLayer( self.linear_conv = ConvBNLayer(
...@@ -123,7 +131,8 @@ class ResidualUnit(nn.Module): ...@@ -123,7 +131,8 @@ class ResidualUnit(nn.Module):
padding=0, padding=0,
if_act=False, if_act=False,
act=None, act=None,
name=name + "_linear") name=name + "_linear",
)
def forward(self, inputs): def forward(self, inputs):
x = self.expand_conv(inputs) x = self.expand_conv(inputs)
...@@ -137,12 +146,9 @@ class ResidualUnit(nn.Module): ...@@ -137,12 +146,9 @@ class ResidualUnit(nn.Module):
class MobileNetV3(nn.Module): class MobileNetV3(nn.Module):
def __init__(self, def __init__(
in_channels=3, self, in_channels=3, model_name="large", scale=0.5, disable_se=False, **kwargs
model_name='large', ):
scale=0.5,
disable_se=False,
**kwargs):
""" """
the MobilenetV3 backbone network for detection module. the MobilenetV3 backbone network for detection module.
Args: Args:
...@@ -155,46 +161,48 @@ class MobileNetV3(nn.Module): ...@@ -155,46 +161,48 @@ class MobileNetV3(nn.Module):
if model_name == "large": if model_name == "large":
cfg = [ cfg = [
# k, exp, c, se, nl, s, # k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', 1], [3, 16, 16, False, "relu", 1],
[3, 64, 24, False, 'relu', 2], [3, 64, 24, False, "relu", 2],
[3, 72, 24, False, 'relu', 1], [3, 72, 24, False, "relu", 1],
[5, 72, 40, True, 'relu', 2], [5, 72, 40, True, "relu", 2],
[5, 120, 40, True, 'relu', 1], [5, 120, 40, True, "relu", 1],
[5, 120, 40, True, 'relu', 1], [5, 120, 40, True, "relu", 1],
[3, 240, 80, False, 'hard_swish', 2], [3, 240, 80, False, "hard_swish", 2],
[3, 200, 80, False, 'hard_swish', 1], [3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, 'hard_swish', 1], [3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, 'hard_swish', 1], [3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, 'hard_swish', 1], [3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, 'hard_swish', 1], [3, 672, 112, True, "hard_swish", 1],
[5, 672, 160, True, 'hard_swish', 2], [5, 672, 160, True, "hard_swish", 2],
[5, 960, 160, True, 'hard_swish', 1], [5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, 'hard_swish', 1], [5, 960, 160, True, "hard_swish", 1],
] ]
cls_ch_squeeze = 960 cls_ch_squeeze = 960
elif model_name == "small": elif model_name == "small":
cfg = [ cfg = [
# k, exp, c, se, nl, s, # k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', 2], [3, 16, 16, True, "relu", 2],
[3, 72, 24, False, 'relu', 2], [3, 72, 24, False, "relu", 2],
[3, 88, 24, False, 'relu', 1], [3, 88, 24, False, "relu", 1],
[5, 96, 40, True, 'hard_swish', 2], [5, 96, 40, True, "hard_swish", 2],
[5, 240, 40, True, 'hard_swish', 1], [5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, 'hard_swish', 1], [5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, 'hard_swish', 1], [5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, 'hard_swish', 1], [5, 144, 48, True, "hard_swish", 1],
[5, 288, 96, True, 'hard_swish', 2], [5, 288, 96, True, "hard_swish", 2],
[5, 576, 96, True, 'hard_swish', 1], [5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, 'hard_swish', 1], [5, 576, 96, True, "hard_swish", 1],
] ]
cls_ch_squeeze = 576 cls_ch_squeeze = 576
else: else:
raise NotImplementedError("mode[" + model_name + raise NotImplementedError(
"_model] is not implemented!") "mode[" + model_name + "_model] is not implemented!"
)
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
assert scale in supported_scale, \ assert (
"supported scale are {} but input scale is {}".format(supported_scale, scale) scale in supported_scale
), "supported scale are {} but input scale is {}".format(supported_scale, scale)
inplanes = 16 inplanes = 16
# conv1 # conv1
self.conv = ConvBNLayer( self.conv = ConvBNLayer(
...@@ -205,15 +213,16 @@ class MobileNetV3(nn.Module): ...@@ -205,15 +213,16 @@ class MobileNetV3(nn.Module):
padding=1, padding=1,
groups=1, groups=1,
if_act=True, if_act=True,
act='hard_swish', act="hard_swish",
name='conv1') name="conv1",
)
self.stages = nn.ModuleList() self.stages = nn.ModuleList()
self.out_channels = [] self.out_channels = []
block_list = [] block_list = []
i = 0 i = 0
inplanes = make_divisible(inplanes * scale) inplanes = make_divisible(inplanes * scale)
for (k, exp, c, se, nl, s) in cfg: for k, exp, c, se, nl, s in cfg:
se = se and not self.disable_se se = se and not self.disable_se
if s == 2 and i > 2: if s == 2 and i > 2:
self.out_channels.append(inplanes) self.out_channels.append(inplanes)
...@@ -228,7 +237,9 @@ class MobileNetV3(nn.Module): ...@@ -228,7 +237,9 @@ class MobileNetV3(nn.Module):
stride=s, stride=s,
use_se=se, use_se=se,
act=nl, act=nl,
name="conv" + str(i + 2))) name="conv" + str(i + 2),
)
)
inplanes = make_divisible(scale * c) inplanes = make_divisible(scale * c)
i += 1 i += 1
block_list.append( block_list.append(
...@@ -240,8 +251,10 @@ class MobileNetV3(nn.Module): ...@@ -240,8 +251,10 @@ class MobileNetV3(nn.Module):
padding=0, padding=0,
groups=1, groups=1,
if_act=True, if_act=True,
act='hard_swish', act="hard_swish",
name='conv_last')) name="conv_last",
)
)
self.stages.append(nn.Sequential(*block_list)) self.stages.append(nn.Sequential(*block_list))
self.out_channels.append(make_divisible(scale * cls_ch_squeeze)) self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
# for i, stage in enumerate(self.stages): # for i, stage in enumerate(self.stages):
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from .det_resnet_vd import DeformableConvV2, ConvBNLayer
class BottleneckBlock(nn.Module):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
is_dcn=False):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=1,
act="relu", )
self.conv1 = ConvBNLayer(
in_channels=num_filters,
out_channels=num_filters,
kernel_size=3,
stride=stride,
act="relu",
is_dcn=is_dcn,
# dcn_groups=1,
)
self.conv2 = ConvBNLayer(
in_channels=num_filters,
out_channels=num_filters * 4,
kernel_size=1,
act=None, )
if not shortcut:
self.short = ConvBNLayer(
in_channels=num_channels,
out_channels=num_filters * 4,
kernel_size=1,
stride=stride, )
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=3,
stride=stride,
act="relu")
self.conv1 = ConvBNLayer(
in_channels=num_filters,
out_channels=num_filters,
kernel_size=3,
act=None)
if not shortcut:
self.short = ConvBNLayer(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=1,
stride=stride)
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv1)
y = F.relu(y)
return y
class ResNet(nn.Module):
def __init__(self,
in_channels=3,
layers=50,
out_indices=None,
dcn_stage=None):
super(ResNet, self).__init__()
self.layers = layers
self.input_image_channel = in_channels
supported_layers = [18, 34, 50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.dcn_stage = dcn_stage if dcn_stage is not None else [
False, False, False, False
]
self.out_indices = out_indices if out_indices is not None else [
0, 1, 2, 3
]
self.conv = ConvBNLayer(
in_channels=self.input_image_channel,
out_channels=64,
kernel_size=7,
stride=2,
act="relu", )
self.pool2d_max = nn.MaxPool2d(
kernel_size=3,
stride=2,
padding=1, )
self.stages = nn.ModuleList()
self.out_channels = []
if layers >= 50:
for block in range(len(depth)):
shortcut = False
block_list = nn.Sequential()
is_dcn = self.dcn_stage[block]
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
is_dcn=is_dcn)
block_list.add_module(conv_name, bottleneck_block)
shortcut = True
if block in self.out_indices:
self.out_channels.append(num_filters[block] * 4)
self.stages.append(block_list)
else:
for block in range(len(depth)):
shortcut = False
block_list = nn.Sequential()
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = BasicBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut)
block_list.add_module(conv_name, basic_block)
shortcut = True
if block in self.out_indices:
self.out_channels.append(num_filters[block])
self.stages.append(block_list)
def forward(self, inputs):
y = self.conv(inputs)
y = self.pool2d_max(y)
out = []
for i, block in enumerate(self.stages):
y = block(y)
if i in self.out_indices:
out.append(y)
return out
import os, sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
import torchvision
class DeformableConvV2(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
weight_attr=None,
bias_attr=None,
lr_scale=1,
regularizer=None,
skip_quant=False,
dcn_bias_regularizer=None,
dcn_bias_lr_scale=2.):
super(DeformableConvV2, self).__init__()
self.offset_channel = 2 * kernel_size**2 * groups
self.mask_channel = kernel_size**2 * groups
if bias_attr:
# in FCOS-DCN head, specifically need learning_rate and regularizer
dcn_bias_attr = True
else:
# in ResNet backbone, do not need bias
dcn_bias_attr = False
self.conv_dcn = torchvision.ops.DeformConv2d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2 * dilation,
dilation=dilation,
groups=groups//2 if groups > 1 else 1,
bias=dcn_bias_attr)
self.conv_offset = nn.Conv2d(
in_channels,
groups * 3 * kernel_size**2,
kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
bias=True)
if skip_quant:
self.conv_offset.skip_quant = True
def forward(self, x):
offset_mask = self.conv_offset(x)
offset, mask = torch.split(
offset_mask,
split_size_or_sections=[self.offset_channel, self.mask_channel],
dim=1)
mask = torch.sigmoid(mask)
y = self.conv_dcn(x, offset, mask=mask)
return y
class ConvBNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
dcn_groups=1,
is_vd_mode=False,
act=None,
name=None,
is_dcn=False,
):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self.act = act
self._pool2d_avg = nn.AvgPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
if not is_dcn:
self._conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
else:
self._conv = DeformableConvV2(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=dcn_groups,
bias_attr=False)
self._batch_norm = nn.BatchNorm2d(
out_channels,
track_running_stats=True,
)
if act is not None:
self._act = Activation(act_type=act, inplace=True)
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act is not None:
y = self._act(y)
return y
class BottleneckBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None,
is_dcn=False,
):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b",
is_dcn=is_dcn,
dcn_groups=2,
)
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = short + conv1
y = F.relu(y)
return y
class ResNet_vd(nn.Module):
def __init__(self,
in_channels=3,
layers=50,
dcn_stage=None,
out_indices=None,
**kwargs):
super(ResNet_vd, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.dcn_stage = dcn_stage if dcn_stage is not None else [
False, False, False, False
]
self.out_indices = out_indices if out_indices is not None else [
0, 1, 2, 3
]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
in_channels=32,
out_channels=32,
kernel_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.stages = nn.ModuleList()
self.out_channels = []
if layers >= 50:
for block in range(len(depth)):
# block_list = []
block_list = nn.Sequential()
shortcut = False
is_dcn = self.dcn_stage[block]
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = BottleneckBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name,
is_dcn=is_dcn,
)
shortcut = True
block_list.add_module('bb_%d_%d' % (block, i), bottleneck_block)
if block in self.out_indices:
self.out_channels.append(num_filters[block] * 4)
# self.stages.append(nn.Sequential(*block_list))
self.stages.append(block_list)
else:
for block in range(len(depth)):
# block_list = []
block_list = nn.Sequential()
shortcut = False
# is_dcn = self.dcn_stage[block]
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = BasicBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
block_list.add_module('bb_%d_%d' % (block, i), basic_block)
# block_list.append(basic_block)
if block in self.out_indices:
self.out_channels.append(num_filters[block])
self.stages.append(block_list)
# self.stages.append(nn.Sequential(*block_list))
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
out = []
for i, block in enumerate(self.stages):
y = block(y)
if i in self.out_indices:
out.append(y)
return out
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
# import paddle
# from paddle import ParamAttr
# import paddle.nn as nn
# import paddle.nn.functional as F
__all__ = ["ResNet_SAST"]
class ConvBNLayer(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = nn.BatchNorm2d(
out_channels,)
self.act = act
if act is not None:
self._act = Activation(act_type=act)
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act:
y = self._act(y)
return y
class BottleneckBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv1)
y = F.relu(y)
return y
class ResNet_SAST(nn.Module):
def __init__(self, in_channels=3, layers=50, **kwargs):
super(ResNet_SAST, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
# depth = [3, 4, 6, 3]
depth = [3, 4, 6, 3, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
# num_channels = [64, 256, 512,
# 1024] if layers >= 50 else [64, 64, 128, 256]
# num_filters = [64, 128, 256, 512]
num_channels = [64, 256, 512,
1024, 2048] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512, 512]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
in_channels=32,
out_channels=32,
kernel_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
act='relu',
name="conv1_3")
# self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.stages = nn.ModuleList()
self.out_channels = [3, 64]
if layers >= 50:
for block in range(len(depth)):
# block_list = []
block_list = nn.Sequential()
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = BottleneckBlock(
in_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name
)
shortcut = True
# block_list.append(bottleneck_block)
block_list.add_module('bb_%d_%d' % (block, i), bottleneck_block)
self.out_channels.append(num_filters[block] * 4)
# self.stages.append(nn.Sequential(*block_list))
self.stages.append(block_list)
else:
for block in range(len(depth)):
# block_list = []
block_list = nn.Sequential()
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = BasicBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
# block_list.append(basic_block)
block_list.add_module('bb_%d_%d' % (block, i), basic_block)
self.out_channels.append(num_filters[block])
# self.stages.append(nn.Sequential(*block_list))
self.stages.append(block_list)
def forward(self, inputs):
out = [inputs]
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
out.append(y)
y = self.pool2d_max(y)
for block in self.stages:
y = block(y)
out.append(y)
return out
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
__all__ = ["ResNet"]
class ConvBNLayer(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = nn.BatchNorm2d(out_channels)
self.act = act
if self.act is not None:
self._act = Activation(act_type=self.act, inplace=True)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act is not None:
y = self._act(y)
return y
class BottleneckBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=stride,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = torch.add(short, conv1)
y = F.relu(y)
return y
class ResNet(nn.Module):
def __init__(self, in_channels=3, layers=50, **kwargs):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
# depth = [3, 4, 6, 3]
depth = [3, 4, 6, 3, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512, 1024,
2048] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512, 512]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=64,
kernel_size=7,
stride=2,
act='relu',
name="conv1_1")
self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.stages = nn.ModuleList()
self.out_channels = [3, 64]
# num_filters = [64, 128, 256, 512, 512]
if layers >= 50:
for block in range(len(depth)):
block_list = nn.Sequential()
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneckBlock = BottleneckBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
block_list.add_module('bb_%d_%d' % (block, i), bottleneckBlock)
self.out_channels.append(num_filters[block] * 4)
self.stages.append(block_list)
else:
for block in range(len(depth)):
block_list = nn.Sequential()
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basicBlock = BasicBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
block_list.add_module('bb_%d_%d' % (block, i), basicBlock)
self.out_channels.append(num_filters[block])
self.stages.append(block_list)
def forward(self, inputs):
out = [inputs]
y = self.conv1_1(inputs)
out.append(y)
y = self.pool2d_max(y)
for block in self.stages:
y = block(y)
out.append(y)
return out
"""
This code is refer from:
https://github.com/LBH1024/CAN/models/densenet.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class Bottleneck(nn.Module):
def __init__(self, nChannels, growthRate, use_dropout):
super(Bottleneck, self).__init__()
interChannels = 4 * growthRate
self.bn1 = nn.BatchNorm2d(interChannels)
self.conv1 = nn.Conv2d(
nChannels, interChannels, kernel_size=1,
bias=True) # Xavier initialization
self.bn2 = nn.BatchNorm2d(growthRate)
self.conv2 = nn.Conv2d(
interChannels, growthRate, kernel_size=3, padding=1,
bias=True) # Xavier initialization
self.use_dropout = use_dropout
self.dropout = nn.Dropout(p=0.2)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
if self.use_dropout:
out = self.dropout(out)
out = F.relu(self.bn2(self.conv2(out)))
if self.use_dropout:
out = self.dropout(out)
out = torch.cat([x, out], 1)
return out
class SingleLayer(nn.Module):
def __init__(self, nChannels, growthRate, use_dropout):
super(SingleLayer, self).__init__()
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(
nChannels, growthRate, kernel_size=3, padding=1, bias=False)
self.use_dropout = use_dropout
self.dropout = nn.Dropout(p=0.2)
def forward(self, x):
out = self.conv1(F.relu(x))
if self.use_dropout:
out = self.dropout(out)
out = torch.cat([x, out], 1)
return out
class Transition(nn.Module):
def __init__(self, nChannels, out_channels, use_dropout):
super(Transition, self).__init__()
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv1 = nn.Conv2d(
nChannels, out_channels, kernel_size=1, bias=False)
self.use_dropout = use_dropout
self.dropout = nn.Dropout(p=0.2)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
if self.use_dropout:
out = self.dropout(out)
out = F.avg_pool2d(out, 2, ceil_mode=True, count_include_pad=False)
return out
class DenseNet(nn.Module):
def __init__(self, growthRate, reduction, bottleneck, use_dropout,
input_channel, **kwargs):
super(DenseNet, self).__init__()
nDenseBlocks = 16
nChannels = 2 * growthRate
self.conv1 = nn.Conv2d(
input_channel,
nChannels,
kernel_size=7,
padding=3,
stride=2,
bias=False)
self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks,
bottleneck, use_dropout)
nChannels += nDenseBlocks * growthRate
out_channels = int(math.floor(nChannels * reduction))
self.trans1 = Transition(nChannels, out_channels, use_dropout)
nChannels = out_channels
self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks,
bottleneck, use_dropout)
nChannels += nDenseBlocks * growthRate
out_channels = int(math.floor(nChannels * reduction))
self.trans2 = Transition(nChannels, out_channels, use_dropout)
nChannels = out_channels
self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks,
bottleneck, use_dropout)
self.out_channels = out_channels
def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck,
use_dropout):
layers = []
for i in range(int(nDenseBlocks)):
if bottleneck:
layers.append(Bottleneck(nChannels, growthRate, use_dropout))
else:
layers.append(SingleLayer(nChannels, growthRate, use_dropout))
nChannels += growthRate
return nn.Sequential(*layers)
def forward(self, inputs):
x, x_m, y = inputs
out = self.conv1(x)
out = F.relu(out, inplace=True)
out = F.max_pool2d(out, 2, ceil_mode=True)
out = self.dense1(out)
out = self.trans1(out)
out = self.dense2(out)
out = self.trans2(out)
out = self.dense3(out)
return out, x_m, y
import torch import torch
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn
class ConvBNAct(nn.Module): class ConvBNAct(nn.Module):
def __init__(self, def __init__(
in_channels, self, in_channels, out_channels, kernel_size, stride, groups=1, use_act=True
out_channels, ):
kernel_size,
stride,
groups=1,
use_act=True):
super().__init__() super().__init__()
self.use_act = use_act self.use_act = use_act
self.conv = nn.Conv2d( self.conv = nn.Conv2d(
...@@ -20,7 +16,8 @@ class ConvBNAct(nn.Module): ...@@ -20,7 +16,8 @@ class ConvBNAct(nn.Module):
stride, stride,
padding=(kernel_size - 1) // 2, padding=(kernel_size - 1) // 2,
groups=groups, groups=groups,
bias=False) bias=False,
)
self.bn = nn.BatchNorm2d(out_channels) self.bn = nn.BatchNorm2d(out_channels)
if self.use_act: if self.use_act:
self.act = nn.ReLU() self.act = nn.ReLU()
...@@ -42,7 +39,8 @@ class ESEModule(nn.Module): ...@@ -42,7 +39,8 @@ class ESEModule(nn.Module):
out_channels=channels, out_channels=channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0,
)
self.sigmoid = nn.Sigmoid() self.sigmoid = nn.Sigmoid()
def forward(self, x): def forward(self, x):
...@@ -55,12 +53,13 @@ class ESEModule(nn.Module): ...@@ -55,12 +53,13 @@ class ESEModule(nn.Module):
class HG_Block(nn.Module): class HG_Block(nn.Module):
def __init__( def __init__(
self, self,
in_channels, in_channels,
mid_channels, mid_channels,
out_channels, out_channels,
layer_num, layer_num,
identity=False, ): identity=False,
):
super().__init__() super().__init__()
self.identity = identity self.identity = identity
...@@ -70,14 +69,18 @@ class HG_Block(nn.Module): ...@@ -70,14 +69,18 @@ class HG_Block(nn.Module):
in_channels=in_channels, in_channels=in_channels,
out_channels=mid_channels, out_channels=mid_channels,
kernel_size=3, kernel_size=3,
stride=1)) stride=1,
)
)
for _ in range(layer_num - 1): for _ in range(layer_num - 1):
self.layers.append( self.layers.append(
ConvBNAct( ConvBNAct(
in_channels=mid_channels, in_channels=mid_channels,
out_channels=mid_channels, out_channels=mid_channels,
kernel_size=3, kernel_size=3,
stride=1)) stride=1,
)
)
# feature aggregation # feature aggregation
total_channels = in_channels + layer_num * mid_channels total_channels = in_channels + layer_num * mid_channels
...@@ -85,7 +88,8 @@ class HG_Block(nn.Module): ...@@ -85,7 +88,8 @@ class HG_Block(nn.Module):
in_channels=total_channels, in_channels=total_channels,
out_channels=out_channels, out_channels=out_channels,
kernel_size=1, kernel_size=1,
stride=1) stride=1,
)
self.att = ESEModule(out_channels) self.att = ESEModule(out_channels)
def forward(self, x): def forward(self, x):
...@@ -104,14 +108,16 @@ class HG_Block(nn.Module): ...@@ -104,14 +108,16 @@ class HG_Block(nn.Module):
class HG_Stage(nn.Module): class HG_Stage(nn.Module):
def __init__(self, def __init__(
in_channels, self,
mid_channels, in_channels,
out_channels, mid_channels,
block_num, out_channels,
layer_num, block_num,
downsample=True, layer_num,
stride=[2, 1]): downsample=True,
stride=[2, 1],
):
super().__init__() super().__init__()
self.downsample = downsample self.downsample = downsample
if downsample: if downsample:
...@@ -121,24 +127,19 @@ class HG_Stage(nn.Module): ...@@ -121,24 +127,19 @@ class HG_Stage(nn.Module):
kernel_size=3, kernel_size=3,
stride=stride, stride=stride,
groups=in_channels, groups=in_channels,
use_act=False) use_act=False,
)
blocks_list = [] blocks_list = []
blocks_list.append( blocks_list.append(
HG_Block( HG_Block(in_channels, mid_channels, out_channels, layer_num, identity=False)
in_channels, )
mid_channels,
out_channels,
layer_num,
identity=False))
for _ in range(block_num - 1): for _ in range(block_num - 1):
blocks_list.append( blocks_list.append(
HG_Block( HG_Block(
out_channels, out_channels, mid_channels, out_channels, layer_num, identity=True
mid_channels, )
out_channels, )
layer_num,
identity=True))
self.blocks = nn.Sequential(*blocks_list) self.blocks = nn.Sequential(*blocks_list)
def forward(self, x): def forward(self, x):
...@@ -164,29 +165,31 @@ class PPHGNet(nn.Module): ...@@ -164,29 +165,31 @@ class PPHGNet(nn.Module):
""" """
def __init__( def __init__(
self, self,
stem_channels, stem_channels,
stage_config, stage_config,
layer_num, layer_num,
in_channels=3, in_channels=3,
det=False, det=False,
out_indices=None): out_indices=None,
):
super().__init__() super().__init__()
self.det = det self.det = det
self.out_indices = out_indices if out_indices is not None else [ self.out_indices = out_indices if out_indices is not None else [0, 1, 2, 3]
0, 1, 2, 3
]
# stem # stem
stem_channels.insert(0, in_channels) stem_channels.insert(0, in_channels)
self.stem = nn.Sequential(* [ self.stem = nn.Sequential(
ConvBNAct( *[
in_channels=stem_channels[i], ConvBNAct(
out_channels=stem_channels[i + 1], in_channels=stem_channels[i],
kernel_size=3, out_channels=stem_channels[i + 1],
stride=2 if i == 0 else 1) for i in range( kernel_size=3,
len(stem_channels) - 1) stride=2 if i == 0 else 1,
]) )
for i in range(len(stem_channels) - 1)
]
)
if self.det: if self.det:
self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
...@@ -194,11 +197,25 @@ class PPHGNet(nn.Module): ...@@ -194,11 +197,25 @@ class PPHGNet(nn.Module):
self.stages = nn.ModuleList() self.stages = nn.ModuleList()
self.out_channels = [] self.out_channels = []
for block_id, k in enumerate(stage_config): for block_id, k in enumerate(stage_config):
in_channels, mid_channels, out_channels, block_num, downsample, stride = stage_config[ (
k] in_channels,
mid_channels,
out_channels,
block_num,
downsample,
stride,
) = stage_config[k]
self.stages.append( self.stages.append(
HG_Stage(in_channels, mid_channels, out_channels, block_num, HG_Stage(
layer_num, downsample, stride)) in_channels,
mid_channels,
out_channels,
block_num,
layer_num,
downsample,
stride,
)
)
if block_id in self.out_indices: if block_id in self.out_indices:
self.out_channels.append(out_channels) self.out_channels.append(out_channels)
...@@ -237,32 +254,6 @@ class PPHGNet(nn.Module): ...@@ -237,32 +254,6 @@ class PPHGNet(nn.Module):
return x return x
def PPHGNet_tiny(pretrained=False, use_ssld=False, **kwargs):
"""
PPHGNet_tiny
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPHGNet_tiny` model depends on args.
"""
stage_config = {
# in_channels, mid_channels, out_channels, blocks, downsample
"stage1": [96, 96, 224, 1, False, [2, 1]],
"stage2": [224, 128, 448, 1, True, [1, 2]],
"stage3": [448, 160, 512, 2, True, [2, 1]],
"stage4": [512, 192, 768, 1, True, [2, 1]],
}
model = PPHGNet(
stem_channels=[48, 48, 96],
stage_config=stage_config,
layer_num=5,
**kwargs)
return model
def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs): def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs):
""" """
PPHGNet_small PPHGNet_small
...@@ -294,31 +285,6 @@ def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs): ...@@ -294,31 +285,6 @@ def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs):
stage_config=stage_config_det if det else stage_config_rec, stage_config=stage_config_det if det else stage_config_rec,
layer_num=6, layer_num=6,
det=det, det=det,
**kwargs) **kwargs
return model )
def PPHGNet_base(pretrained=False, use_ssld=True, **kwargs):
"""
PPHGNet_base
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPHGNet_base` model depends on args.
"""
stage_config = {
# in_channels, mid_channels, out_channels, blocks, downsample
"stage1": [160, 192, 320, 1, False, [2, 1]],
"stage2": [320, 224, 640, 2, True, [1, 2]],
"stage3": [640, 256, 960, 3, True, [2, 1]],
"stage4": [960, 288, 1280, 2, True, [2, 1]],
}
model = PPHGNet(
stem_channels=[96, 96, 160],
stage_config=stage_config,
layer_num=7,
**kwargs)
return model return model
...@@ -12,43 +12,54 @@ ...@@ -12,43 +12,54 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import torch import torch
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from pytorchocr.modeling.common import Activation from torch import nn
# from paddle.nn.initializer import Constant, KaimingNormal from ..common import Activation
# from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Dropout, Hardsigmoid, Hardswish, Identity, Linear, ReLU
# from paddle.regularizer import L2Decay
NET_CONFIG_det = { NET_CONFIG_det = {
"blocks2": "blocks2":
#k, in_c, out_c, s, use_se # k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]], [[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
"blocks5": "blocks5": [
[[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], [3, 128, 256, 2, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False]], [5, 256, 256, 1, False],
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True], [5, 256, 256, 1, False],
[5, 512, 512, 1, False], [5, 512, 512, 1, False]] [5, 256, 256, 1, False],
[5, 256, 256, 1, False],
],
"blocks6": [
[5, 256, 512, 2, True],
[5, 512, 512, 1, True],
[5, 512, 512, 1, False],
[5, 512, 512, 1, False],
],
} }
NET_CONFIG_rec = { NET_CONFIG_rec = {
"blocks2": "blocks2":
#k, in_c, out_c, s, use_se # k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]], [[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]], "blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]], "blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
"blocks5": "blocks5": [
[[3, 128, 256, (1, 2), False], [5, 256, 256, 1, False], [3, 128, 256, (1, 2), False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False]], [5, 256, 256, 1, False],
"blocks6": [[5, 256, 512, (2, 1), True], [5, 512, 512, 1, True], [5, 256, 256, 1, False],
[5, 512, 512, (2, 1), False], [5, 512, 512, 1, False]] [5, 256, 256, 1, False],
[5, 256, 256, 1, False],
],
"blocks6": [
[5, 256, 512, (2, 1), True],
[5, 512, 512, 1, True],
[5, 512, 512, (2, 1), False],
[5, 512, 512, 1, False],
],
} }
...@@ -62,8 +73,7 @@ def make_divisible(v, divisor=16, min_value=None): ...@@ -62,8 +73,7 @@ def make_divisible(v, divisor=16, min_value=None):
class LearnableAffineBlock(nn.Module): class LearnableAffineBlock(nn.Module):
def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.1):
lab_lr=0.1):
super().__init__() super().__init__()
self.scale = nn.Parameter(torch.Tensor([scale_value])) self.scale = nn.Parameter(torch.Tensor([scale_value]))
self.bias = nn.Parameter(torch.Tensor([bias_value])) self.bias = nn.Parameter(torch.Tensor([bias_value]))
...@@ -73,13 +83,9 @@ class LearnableAffineBlock(nn.Module): ...@@ -73,13 +83,9 @@ class LearnableAffineBlock(nn.Module):
class ConvBNLayer(nn.Module): class ConvBNLayer(nn.Module):
def __init__(self, def __init__(
in_channels, self, in_channels, out_channels, kernel_size, stride, groups=1, lr_mult=1.0
out_channels, ):
kernel_size,
stride,
groups=1,
lr_mult=1.0):
super().__init__() super().__init__()
self.conv = nn.Conv2d( self.conv = nn.Conv2d(
in_channels=in_channels, in_channels=in_channels,
...@@ -88,7 +94,8 @@ class ConvBNLayer(nn.Module): ...@@ -88,7 +94,8 @@ class ConvBNLayer(nn.Module):
stride=stride, stride=stride,
padding=(kernel_size - 1) // 2, padding=(kernel_size - 1) // 2,
groups=groups, groups=groups,
bias=False) bias=False,
)
self.bn = nn.BatchNorm2d( self.bn = nn.BatchNorm2d(
out_channels, out_channels,
...@@ -115,15 +122,17 @@ class Act(nn.Module): ...@@ -115,15 +122,17 @@ class Act(nn.Module):
class LearnableRepLayer(nn.Module): class LearnableRepLayer(nn.Module):
def __init__(self, def __init__(
in_channels, self,
out_channels, in_channels,
kernel_size, out_channels,
stride=1, kernel_size,
groups=1, stride=1,
num_conv_branches=1, groups=1,
lr_mult=1.0, num_conv_branches=1,
lab_lr=0.1): lr_mult=1.0,
lab_lr=0.1,
):
super().__init__() super().__init__()
self.is_repped = False self.is_repped = False
self.groups = groups self.groups = groups
...@@ -134,27 +143,35 @@ class LearnableRepLayer(nn.Module): ...@@ -134,27 +143,35 @@ class LearnableRepLayer(nn.Module):
self.num_conv_branches = num_conv_branches self.num_conv_branches = num_conv_branches
self.padding = (kernel_size - 1) // 2 self.padding = (kernel_size - 1) // 2
self.identity = nn.BatchNorm2d( self.identity = (
num_features=in_channels, nn.BatchNorm2d(
) if out_channels == in_channels and stride == 1 else None num_features=in_channels,
)
if out_channels == in_channels and stride == 1
else None
)
self.conv_kxk = nn.ModuleList(
[
ConvBNLayer(
in_channels,
out_channels,
kernel_size,
stride,
groups=groups,
lr_mult=lr_mult,
)
for _ in range(self.num_conv_branches)
]
)
self.conv_kxk = nn.ModuleList([ self.conv_1x1 = (
ConvBNLayer( ConvBNLayer(
in_channels, in_channels, out_channels, 1, stride, groups=groups, lr_mult=lr_mult
out_channels, )
kernel_size, if kernel_size > 1
stride, else None
groups=groups, )
lr_mult=lr_mult) for _ in range(self.num_conv_branches)
])
self.conv_1x1 = ConvBNLayer(
in_channels,
out_channels,
1,
stride,
groups=groups,
lr_mult=lr_mult) if kernel_size > 1 else None
self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr) self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr) self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
...@@ -192,7 +209,8 @@ class LearnableRepLayer(nn.Module): ...@@ -192,7 +209,8 @@ class LearnableRepLayer(nn.Module):
kernel_size=self.kernel_size, kernel_size=self.kernel_size,
stride=self.stride, stride=self.stride,
padding=self.padding, padding=self.padding,
groups=self.groups) groups=self.groups,
)
self.reparam_conv.weight.data = kernel self.reparam_conv.weight.data = kernel
self.reparam_conv.bias.data = bias self.reparam_conv.bias.data = bias
self.is_repped = True self.is_repped = True
...@@ -205,8 +223,9 @@ class LearnableRepLayer(nn.Module): ...@@ -205,8 +223,9 @@ class LearnableRepLayer(nn.Module):
def _get_kernel_bias(self): def _get_kernel_bias(self):
kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1) kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(kernel_conv_1x1, kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(
self.kernel_size // 2) kernel_conv_1x1, self.kernel_size // 2
)
kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity) kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
...@@ -233,15 +252,16 @@ class LearnableRepLayer(nn.Module): ...@@ -233,15 +252,16 @@ class LearnableRepLayer(nn.Module):
eps = branch.bn._epsilon eps = branch.bn._epsilon
else: else:
assert isinstance(branch, nn.BatchNorm2d) assert isinstance(branch, nn.BatchNorm2d)
if not hasattr(self, 'id_tensor'): if not hasattr(self, "id_tensor"):
input_dim = self.in_channels // self.groups input_dim = self.in_channels // self.groups
kernel_value = torch.zeros( kernel_value = torch.zeros(
(self.in_channels, input_dim, self.kernel_size, (self.in_channels, input_dim, self.kernel_size, self.kernel_size),
self.kernel_size), dtype=branch.weight.dtype,
dtype=branch.weight.dtype) )
for i in range(self.in_channels): for i in range(self.in_channels):
kernel_value[i, i % input_dim, self.kernel_size // 2, kernel_value[
self.kernel_size // 2] = 1 i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
] = 1
self.id_tensor = kernel_value self.id_tensor = kernel_value
kernel = self.id_tensor kernel = self.id_tensor
running_mean = branch._mean running_mean = branch._mean
...@@ -287,15 +307,17 @@ class SELayer(nn.Module): ...@@ -287,15 +307,17 @@ class SELayer(nn.Module):
class LCNetV3Block(nn.Module): class LCNetV3Block(nn.Module):
def __init__(self, def __init__(
in_channels, self,
out_channels, in_channels,
stride, out_channels,
dw_size, stride,
use_se=False, dw_size,
conv_kxk_num=4, use_se=False,
lr_mult=1.0, conv_kxk_num=4,
lab_lr=0.1): lr_mult=1.0,
lab_lr=0.1,
):
super().__init__() super().__init__()
self.use_se = use_se self.use_se = use_se
self.dw_conv = LearnableRepLayer( self.dw_conv = LearnableRepLayer(
...@@ -306,7 +328,8 @@ class LCNetV3Block(nn.Module): ...@@ -306,7 +328,8 @@ class LCNetV3Block(nn.Module):
groups=in_channels, groups=in_channels,
num_conv_branches=conv_kxk_num, num_conv_branches=conv_kxk_num,
lr_mult=lr_mult, lr_mult=lr_mult,
lab_lr=lab_lr) lab_lr=lab_lr,
)
if use_se: if use_se:
self.se = SELayer(in_channels, lr_mult=lr_mult) self.se = SELayer(in_channels, lr_mult=lr_mult)
self.pw_conv = LearnableRepLayer( self.pw_conv = LearnableRepLayer(
...@@ -316,7 +339,8 @@ class LCNetV3Block(nn.Module): ...@@ -316,7 +339,8 @@ class LCNetV3Block(nn.Module):
stride=1, stride=1,
num_conv_branches=conv_kxk_num, num_conv_branches=conv_kxk_num,
lr_mult=lr_mult, lr_mult=lr_mult,
lab_lr=lab_lr) lab_lr=lab_lr,
)
def forward(self, x): def forward(self, x):
x = self.dw_conv(x) x = self.dw_conv(x)
...@@ -327,13 +351,15 @@ class LCNetV3Block(nn.Module): ...@@ -327,13 +351,15 @@ class LCNetV3Block(nn.Module):
class PPLCNetV3(nn.Module): class PPLCNetV3(nn.Module):
def __init__(self, def __init__(
scale=1.0, self,
conv_kxk_num=4, scale=1.0,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], conv_kxk_num=4,
lab_lr=0.1, lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
det=False, lab_lr=0.1,
**kwargs): det=False,
**kwargs
):
super().__init__() super().__init__()
self.scale = scale self.scale = scale
self.lr_mult_list = lr_mult_list self.lr_mult_list = lr_mult_list
...@@ -341,90 +367,102 @@ class PPLCNetV3(nn.Module): ...@@ -341,90 +367,102 @@ class PPLCNetV3(nn.Module):
self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec
assert isinstance(self.lr_mult_list, ( assert isinstance(
list, tuple self.lr_mult_list, (list, tuple)
)), "lr_mult_list should be in (list, tuple) but got {}".format( ), "lr_mult_list should be in (list, tuple) but got {}".format(
type(self.lr_mult_list)) type(self.lr_mult_list)
assert len(self.lr_mult_list )
) == 6, "lr_mult_list length should be 6 but got {}".format( assert (
len(self.lr_mult_list)) len(self.lr_mult_list) == 6
), "lr_mult_list length should be 6 but got {}".format(len(self.lr_mult_list))
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
in_channels=3, in_channels=3,
out_channels=make_divisible(16 * scale), out_channels=make_divisible(16 * scale),
kernel_size=3, kernel_size=3,
stride=2, stride=2,
lr_mult=self.lr_mult_list[0]) lr_mult=self.lr_mult_list[0],
)
self.blocks2 = nn.Sequential(*[
LCNetV3Block( self.blocks2 = nn.Sequential(
in_channels=make_divisible(in_c * scale), *[
out_channels=make_divisible(out_c * scale), LCNetV3Block(
dw_size=k, in_channels=make_divisible(in_c * scale),
stride=s, out_channels=make_divisible(out_c * scale),
use_se=se, dw_size=k,
conv_kxk_num=conv_kxk_num, stride=s,
lr_mult=self.lr_mult_list[1], use_se=se,
lab_lr=lab_lr) conv_kxk_num=conv_kxk_num,
for i, (k, in_c, out_c, s, se lr_mult=self.lr_mult_list[1],
) in enumerate(self.net_config["blocks2"]) lab_lr=lab_lr,
]) )
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"])
self.blocks3 = nn.Sequential(*[ ]
LCNetV3Block( )
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale), self.blocks3 = nn.Sequential(
dw_size=k, *[
stride=s, LCNetV3Block(
use_se=se, in_channels=make_divisible(in_c * scale),
conv_kxk_num=conv_kxk_num, out_channels=make_divisible(out_c * scale),
lr_mult=self.lr_mult_list[2], dw_size=k,
lab_lr=lab_lr) stride=s,
for i, (k, in_c, out_c, s, se use_se=se,
) in enumerate(self.net_config["blocks3"]) conv_kxk_num=conv_kxk_num,
]) lr_mult=self.lr_mult_list[2],
lab_lr=lab_lr,
self.blocks4 = nn.Sequential(*[ )
LCNetV3Block( for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"])
in_channels=make_divisible(in_c * scale), ]
out_channels=make_divisible(out_c * scale), )
dw_size=k,
stride=s, self.blocks4 = nn.Sequential(
use_se=se, *[
conv_kxk_num=conv_kxk_num, LCNetV3Block(
lr_mult=self.lr_mult_list[3], in_channels=make_divisible(in_c * scale),
lab_lr=lab_lr) out_channels=make_divisible(out_c * scale),
for i, (k, in_c, out_c, s, se dw_size=k,
) in enumerate(self.net_config["blocks4"]) stride=s,
]) use_se=se,
conv_kxk_num=conv_kxk_num,
self.blocks5 = nn.Sequential(*[ lr_mult=self.lr_mult_list[3],
LCNetV3Block( lab_lr=lab_lr,
in_channels=make_divisible(in_c * scale), )
out_channels=make_divisible(out_c * scale), for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"])
dw_size=k, ]
stride=s, )
use_se=se,
conv_kxk_num=conv_kxk_num, self.blocks5 = nn.Sequential(
lr_mult=self.lr_mult_list[4], *[
lab_lr=lab_lr) LCNetV3Block(
for i, (k, in_c, out_c, s, se in_channels=make_divisible(in_c * scale),
) in enumerate(self.net_config["blocks5"]) out_channels=make_divisible(out_c * scale),
]) dw_size=k,
stride=s,
self.blocks6 = nn.Sequential(*[ use_se=se,
LCNetV3Block( conv_kxk_num=conv_kxk_num,
in_channels=make_divisible(in_c * scale), lr_mult=self.lr_mult_list[4],
out_channels=make_divisible(out_c * scale), lab_lr=lab_lr,
dw_size=k, )
stride=s, for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"])
use_se=se, ]
conv_kxk_num=conv_kxk_num, )
lr_mult=self.lr_mult_list[5],
lab_lr=lab_lr) self.blocks6 = nn.Sequential(
for i, (k, in_c, out_c, s, se *[
) in enumerate(self.net_config["blocks6"]) LCNetV3Block(
]) in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[5],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"])
]
)
self.out_channels = make_divisible(512 * scale) self.out_channels = make_divisible(512 * scale)
if self.det: if self.det:
...@@ -436,15 +474,19 @@ class PPLCNetV3(nn.Module): ...@@ -436,15 +474,19 @@ class PPLCNetV3(nn.Module):
make_divisible(self.net_config["blocks6"][-1][2] * scale), make_divisible(self.net_config["blocks6"][-1][2] * scale),
] ]
self.layer_list = nn.ModuleList([ self.layer_list = nn.ModuleList(
nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0), [
nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0), nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0), nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0) nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
]) nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0),
]
)
self.out_channels = [ self.out_channels = [
int(mv_c[0] * scale), int(mv_c[1] * scale), int(mv_c[0] * scale),
int(mv_c[2] * scale), int(mv_c[3] * scale) int(mv_c[1] * scale),
int(mv_c[2] * scale),
int(mv_c[3] * scale),
] ]
def forward(self, x): def forward(self, x):
......
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
NET_CONFIG_det = {
"blocks2":
# k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
"blocks5":
[[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False]],
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True],
[5, 512, 512, 1, False], [5, 512, 512, 1, False]]
}
NET_CONFIG_rec = {
"blocks2":
# k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
"blocks5":
[[3, 128, 256, (1, 2), False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
"blocks6": [[5, 256, 512, (2, 1), True], [5, 512, 512, 1, True],
[5, 512, 512, (2, 1), False], [5, 512, 512, 1, False]]
}
def make_divisible(v, divisor=16, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class LearnableAffineBlock(nn.Module):
def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0,
lab_lr=0.1):
super().__init__()
self.scale = nn.Parameter(torch.Tensor([scale_value]))
self.bias = nn.Parameter(torch.Tensor([bias_value]))
def forward(self, x):
return self.scale * x + self.bias
class ConvBNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
groups=1,
lr_mult=1.0):
super().__init__()
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class Act(nn.Module):
def __init__(self, act="hard_swish", lr_mult=1.0, lab_lr=0.1):
super().__init__()
assert act in ['hard_swish', 'relu']
self.act = Activation(act)
self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
def forward(self, x):
return self.lab(self.act(x))
class LearnableRepLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
num_conv_branches=1,
lr_mult=1.0,
lab_lr=0.1):
super().__init__()
self.is_repped = False
self.groups = groups
self.stride = stride
self.kernel_size = kernel_size
self.in_channels = in_channels
self.out_channels = out_channels
self.num_conv_branches = num_conv_branches
self.padding = (kernel_size - 1) // 2
self.identity = nn.BatchNorm2d(in_channels) if out_channels == in_channels and stride == 1 else None
self.conv_kxk = nn.ModuleList([
ConvBNLayer(
in_channels,
out_channels,
kernel_size,
stride,
groups=groups,
lr_mult=lr_mult) for _ in range(self.num_conv_branches)
])
self.conv_1x1 = ConvBNLayer(
in_channels,
out_channels,
1,
stride,
groups=groups,
lr_mult=lr_mult) if kernel_size > 1 else None
self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
def forward(self, x):
# for export
if self.is_repped:
out = self.lab(self.reparam_conv(x))
if self.stride != 2:
out = self.act(out)
return out
out = 0
if self.identity is not None:
out += self.identity(x)
if self.conv_1x1 is not None:
out += self.conv_1x1(x)
for conv in self.conv_kxk:
out += conv(x)
out = self.lab(out)
if self.stride != 2:
out = self.act(out)
return out
def rep(self):
if self.is_repped:
return
kernel, bias = self._get_kernel_bias()
self.reparam_conv = nn.Conv2d(
in_channels=self.in_channels,
out_channels=self.out_channels,
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
groups=self.groups)
self.reparam_conv.weight.data = kernel
self.reparam_conv.bias.data = bias
self.is_repped = True
def _pad_kernel_1x1_to_kxk(self, kernel1x1, pad):
if not isinstance(kernel1x1, torch.Tensor):
return 0
else:
return nn.functional.pad(kernel1x1, [pad, pad, pad, pad])
def _get_kernel_bias(self):
kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(kernel_conv_1x1,
self.kernel_size // 2)
kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
kernel_conv_kxk = 0
bias_conv_kxk = 0
for conv in self.conv_kxk:
kernel, bias = self._fuse_bn_tensor(conv)
kernel_conv_kxk += kernel
bias_conv_kxk += bias
kernel_reparam = kernel_conv_kxk + kernel_conv_1x1 + kernel_identity
bias_reparam = bias_conv_kxk + bias_conv_1x1 + bias_identity
return kernel_reparam, bias_reparam
def _fuse_bn_tensor(self, branch):
if not branch:
return 0, 0
elif isinstance(branch, ConvBNLayer):
kernel = branch.conv.weight
running_mean = branch.bn.running_mean
running_var = branch.bn.running_var
gamma = branch.bn.weight
beta = branch.bn.bias
eps = branch.bn.eps
else:
assert isinstance(branch, nn.BatchNorm2d)
if not hasattr(self, 'id_tensor'):
input_dim = self.in_channels // self.groups
kernel_value = torch.zeros(
(self.in_channels, input_dim, self.kernel_size,
self.kernel_size),
dtype=branch.weight.dtype)
for i in range(self.in_channels):
kernel_value[i, i % input_dim, self.kernel_size // 2,
self.kernel_size // 2] = 1
self.id_tensor = kernel_value
kernel = self.id_tensor
running_mean = branch.running_mean
running_var = branch.running_var
gamma = branch.weight
beta = branch.bias
eps = branch.eps
std = (running_var + eps).sqrt()
t = (gamma / std).reshape((-1, 1, 1, 1))
return kernel * t, beta - running_mean * gamma / std
class SELayer(nn.Module):
def __init__(self, channel, reduction=4, lr_mult=1.0):
super().__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv1 = nn.Conv2d(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0)
self.hardsigmoid = Activation('hard_sigmoid')
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
x = x * identity
return x
class LCNetV3Block(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
dw_size,
use_se=False,
conv_kxk_num=4,
lr_mult=1.0,
lab_lr=0.1):
super().__init__()
self.use_se = use_se
self.dw_conv = LearnableRepLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=dw_size,
stride=stride,
groups=in_channels,
num_conv_branches=conv_kxk_num,
lr_mult=lr_mult,
lab_lr=lab_lr)
if use_se:
self.se = SELayer(in_channels, lr_mult=lr_mult)
self.pw_conv = LearnableRepLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
num_conv_branches=conv_kxk_num,
lr_mult=lr_mult,
lab_lr=lab_lr)
def forward(self, x):
x = self.dw_conv(x)
if self.use_se:
x = self.se(x)
x = self.pw_conv(x)
return x
class PPLCNetV3(nn.Module):
def __init__(self,
scale=1.0,
conv_kxk_num=4,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
lab_lr=0.1,
det=False,
**kwargs):
super().__init__()
self.scale = scale
self.lr_mult_list = lr_mult_list
self.det = det
self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec
assert isinstance(self.lr_mult_list, (
list, tuple
)), "lr_mult_list should be in (list, tuple) but got {}".format(
type(self.lr_mult_list))
assert len(self.lr_mult_list
) == 6, "lr_mult_list length should be 6 but got {}".format(
len(self.lr_mult_list))
self.conv1 = ConvBNLayer(
in_channels=3,
out_channels=make_divisible(16 * scale),
kernel_size=3,
stride=2,
lr_mult=self.lr_mult_list[0])
self.blocks2 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[1],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks2"])
])
self.blocks3 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[2],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks3"])
])
self.blocks4 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[3],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks4"])
])
self.blocks5 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[4],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks5"])
])
self.blocks6 = nn.Sequential(*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[5],
lab_lr=lab_lr)
for i, (k, in_c, out_c, s, se
) in enumerate(self.net_config["blocks6"])
])
self.out_channels = make_divisible(512 * scale)
if self.det:
mv_c = [16, 24, 56, 480]
self.out_channels = [
make_divisible(self.net_config["blocks3"][-1][2] * scale),
make_divisible(self.net_config["blocks4"][-1][2] * scale),
make_divisible(self.net_config["blocks5"][-1][2] * scale),
make_divisible(self.net_config["blocks6"][-1][2] * scale),
]
self.layer_list = nn.ModuleList([
nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0)
])
self.out_channels = [
int(mv_c[0] * scale), int(mv_c[1] * scale),
int(mv_c[2] * scale), int(mv_c[3] * scale)
]
def forward(self, x):
out_list = []
x = self.conv1(x)
x = self.blocks2(x)
x = self.blocks3(x)
out_list.append(x)
x = self.blocks4(x)
out_list.append(x)
x = self.blocks5(x)
out_list.append(x)
import numpy as np
x = torch.Tensor(np.load('../PaddleOCR4debug/tmp.npy'))
x = self.blocks6(x)
out_list.append(x)
if self.det:
out_list[0] = self.layer_list[0](out_list[0])
out_list[1] = self.layer_list[1](out_list[1])
out_list[2] = self.layer_list[2](out_list[2])
out_list[3] = self.layer_list[3](out_list[3])
return out_list
if self.training:
x = F.adaptive_avg_pool2d(x, [1, 40])
else:
x = F.avg_pool2d(x, [3, 2])
return x
\ No newline at end of file
import os, sys from torch import nn
import torch
import torch.nn as nn from .det_mobilenet_v3 import ConvBNLayer, ResidualUnit, make_divisible
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
from .det_mobilenet_v3 import ResidualUnit, ConvBNLayer, make_divisible
class MobileNetV3(nn.Module): class MobileNetV3(nn.Module):
def __init__(self, def __init__(
in_channels=3, self,
model_name='small', in_channels=3,
scale=0.5, model_name="small",
large_stride=None, scale=0.5,
small_stride=None, large_stride=None,
**kwargs): small_stride=None,
**kwargs
):
super(MobileNetV3, self).__init__() super(MobileNetV3, self).__init__()
if small_stride is None: if small_stride is None:
small_stride = [2, 2, 2, 2] small_stride = [2, 2, 2, 2]
if large_stride is None: if large_stride is None:
large_stride = [1, 2, 2, 2] large_stride = [1, 2, 2, 2]
assert isinstance(large_stride, list), "large_stride type must " \ assert isinstance(
"be list but got {}".format(type(large_stride)) large_stride, list
assert isinstance(small_stride, list), "small_stride type must " \ ), "large_stride type must " "be list but got {}".format(type(large_stride))
"be list but got {}".format(type(small_stride)) assert isinstance(
assert len(large_stride) == 4, "large_stride length must be " \ small_stride, list
"4 but got {}".format(len(large_stride)) ), "small_stride type must " "be list but got {}".format(type(small_stride))
assert len(small_stride) == 4, "small_stride length must be " \ assert (
"4 but got {}".format(len(small_stride)) len(large_stride) == 4
), "large_stride length must be " "4 but got {}".format(len(large_stride))
assert (
len(small_stride) == 4
), "small_stride length must be " "4 but got {}".format(len(small_stride))
if model_name == "large": if model_name == "large":
cfg = [ cfg = [
# k, exp, c, se, nl, s, # k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', large_stride[0]], [3, 16, 16, False, "relu", large_stride[0]],
[3, 64, 24, False, 'relu', (large_stride[1], 1)], [3, 64, 24, False, "relu", (large_stride[1], 1)],
[3, 72, 24, False, 'relu', 1], [3, 72, 24, False, "relu", 1],
[5, 72, 40, True, 'relu', (large_stride[2], 1)], [5, 72, 40, True, "relu", (large_stride[2], 1)],
[5, 120, 40, True, 'relu', 1], [5, 120, 40, True, "relu", 1],
[5, 120, 40, True, 'relu', 1], [5, 120, 40, True, "relu", 1],
[3, 240, 80, False, 'hard_swish', 1], [3, 240, 80, False, "hard_swish", 1],
[3, 200, 80, False, 'hard_swish', 1], [3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, 'hard_swish', 1], [3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, 'hard_swish', 1], [3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, 'hard_swish', 1], [3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, 'hard_swish', 1], [3, 672, 112, True, "hard_swish", 1],
[5, 672, 160, True, 'hard_swish', (large_stride[3], 1)], [5, 672, 160, True, "hard_swish", (large_stride[3], 1)],
[5, 960, 160, True, 'hard_swish', 1], [5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, 'hard_swish', 1], [5, 960, 160, True, "hard_swish", 1],
] ]
cls_ch_squeeze = 960 cls_ch_squeeze = 960
elif model_name == "small": elif model_name == "small":
cfg = [ cfg = [
# k, exp, c, se, nl, s, # k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', (small_stride[0], 1)], [3, 16, 16, True, "relu", (small_stride[0], 1)],
[3, 72, 24, False, 'relu', (small_stride[1], 1)], [3, 72, 24, False, "relu", (small_stride[1], 1)],
[3, 88, 24, False, 'relu', 1], [3, 88, 24, False, "relu", 1],
[5, 96, 40, True, 'hard_swish', (small_stride[2], 1)], [5, 96, 40, True, "hard_swish", (small_stride[2], 1)],
[5, 240, 40, True, 'hard_swish', 1], [5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, 'hard_swish', 1], [5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, 'hard_swish', 1], [5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, 'hard_swish', 1], [5, 144, 48, True, "hard_swish", 1],
[5, 288, 96, True, 'hard_swish', (small_stride[3], 1)], [5, 288, 96, True, "hard_swish", (small_stride[3], 1)],
[5, 576, 96, True, 'hard_swish', 1], [5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, 'hard_swish', 1], [5, 576, 96, True, "hard_swish", 1],
] ]
cls_ch_squeeze = 576 cls_ch_squeeze = 576
else: else:
raise NotImplementedError("mode[" + model_name + raise NotImplementedError(
"_model] is not implemented!") "mode[" + model_name + "_model] is not implemented!"
)
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
assert scale in supported_scale, \ assert (
"supported scales are {} but input scale is {}".format(supported_scale, scale) scale in supported_scale
), "supported scales are {} but input scale is {}".format(
supported_scale, scale
)
inplanes = 16 inplanes = 16
# conv1 # conv1
...@@ -83,12 +90,13 @@ class MobileNetV3(nn.Module): ...@@ -83,12 +90,13 @@ class MobileNetV3(nn.Module):
padding=1, padding=1,
groups=1, groups=1,
if_act=True, if_act=True,
act='hard_swish', act="hard_swish",
name='conv1') name="conv1",
)
i = 0 i = 0
block_list = [] block_list = []
inplanes = make_divisible(inplanes * scale) inplanes = make_divisible(inplanes * scale)
for (k, exp, c, se, nl, s) in cfg: for k, exp, c, se, nl, s in cfg:
block_list.append( block_list.append(
ResidualUnit( ResidualUnit(
in_channels=inplanes, in_channels=inplanes,
...@@ -98,7 +106,9 @@ class MobileNetV3(nn.Module): ...@@ -98,7 +106,9 @@ class MobileNetV3(nn.Module):
stride=s, stride=s,
use_se=se, use_se=se,
act=nl, act=nl,
name='conv' + str(i + 2))) name="conv" + str(i + 2),
)
)
inplanes = make_divisible(scale * c) inplanes = make_divisible(scale * c)
i += 1 i += 1
self.blocks = nn.Sequential(*block_list) self.blocks = nn.Sequential(*block_list)
...@@ -111,8 +121,9 @@ class MobileNetV3(nn.Module): ...@@ -111,8 +121,9 @@ class MobileNetV3(nn.Module):
padding=0, padding=0,
groups=1, groups=1,
if_act=True, if_act=True,
act='hard_swish', act="hard_swish",
name='conv_last') name="conv_last",
)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.out_channels = make_divisible(scale * cls_ch_squeeze) self.out_channels = make_divisible(scale * cls_ch_squeeze)
...@@ -122,4 +133,4 @@ class MobileNetV3(nn.Module): ...@@ -122,4 +133,4 @@ class MobileNetV3(nn.Module):
x = self.blocks(x) x = self.blocks(x)
x = self.conv2(x) x = self.conv2(x)
x = self.pool(x) x = self.pool(x)
return x return x
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment