Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
4824c25b
Commit
4824c25b
authored
Jul 04, 2024
by
wangsen
Browse files
Initial commit
parents
Changes
396
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1981 additions
and
0 deletions
+1981
-0
benchmark/PaddleOCR_DBNet/config/open_dataset.yaml
benchmark/PaddleOCR_DBNet/config/open_dataset.yaml
+74
-0
benchmark/PaddleOCR_DBNet/config/open_dataset_dcn_resnet50_FPN_DBhead_polyLR.yaml
...t/config/open_dataset_dcn_resnet50_FPN_DBhead_polyLR.yaml
+86
-0
benchmark/PaddleOCR_DBNet/config/open_dataset_resnest50_FPN_DBhead_polyLR.yaml
...BNet/config/open_dataset_resnest50_FPN_DBhead_polyLR.yaml
+86
-0
benchmark/PaddleOCR_DBNet/config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml
...DBNet/config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml
+93
-0
benchmark/PaddleOCR_DBNet/data_loader/__init__.py
benchmark/PaddleOCR_DBNet/data_loader/__init__.py
+106
-0
benchmark/PaddleOCR_DBNet/data_loader/dataset.py
benchmark/PaddleOCR_DBNet/data_loader/dataset.py
+181
-0
benchmark/PaddleOCR_DBNet/data_loader/modules/__init__.py
benchmark/PaddleOCR_DBNet/data_loader/modules/__init__.py
+8
-0
benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py
benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py
+304
-0
benchmark/PaddleOCR_DBNet/data_loader/modules/iaa_augment.py
benchmark/PaddleOCR_DBNet/data_loader/modules/iaa_augment.py
+71
-0
benchmark/PaddleOCR_DBNet/data_loader/modules/make_border_map.py
...rk/PaddleOCR_DBNet/data_loader/modules/make_border_map.py
+143
-0
benchmark/PaddleOCR_DBNet/data_loader/modules/make_shrink_map.py
...rk/PaddleOCR_DBNet/data_loader/modules/make_shrink_map.py
+133
-0
benchmark/PaddleOCR_DBNet/data_loader/modules/random_crop_data.py
...k/PaddleOCR_DBNet/data_loader/modules/random_crop_data.py
+206
-0
benchmark/PaddleOCR_DBNet/environment.yml
benchmark/PaddleOCR_DBNet/environment.yml
+21
-0
benchmark/PaddleOCR_DBNet/eval.sh
benchmark/PaddleOCR_DBNet/eval.sh
+2
-0
benchmark/PaddleOCR_DBNet/generate_lists.sh
benchmark/PaddleOCR_DBNet/generate_lists.sh
+17
-0
benchmark/PaddleOCR_DBNet/imgs/paper/db.jpg
benchmark/PaddleOCR_DBNet/imgs/paper/db.jpg
+0
-0
benchmark/PaddleOCR_DBNet/models/__init__.py
benchmark/PaddleOCR_DBNet/models/__init__.py
+20
-0
benchmark/PaddleOCR_DBNet/models/backbone/__init__.py
benchmark/PaddleOCR_DBNet/models/backbone/__init__.py
+18
-0
benchmark/PaddleOCR_DBNet/models/backbone/resnet.py
benchmark/PaddleOCR_DBNet/models/backbone/resnet.py
+375
-0
benchmark/PaddleOCR_DBNet/models/basic.py
benchmark/PaddleOCR_DBNet/models/basic.py
+37
-0
No files found.
Too many changes to show.
To preserve performance only
396 of 396+
files are displayed.
Plain diff
Email patch
benchmark/PaddleOCR_DBNet/config/open_dataset.yaml
0 → 100644
View file @
4824c25b
name
:
DBNet
dataset
:
train
:
dataset
:
type
:
DetDataset
# 数据集类型
args
:
data_path
:
# 一个存放 img_path \t gt_path的文件
-
'
'
pre_processes
:
# 数据的预处理过程,包含augment和标签制作
-
type
:
IaaAugment
# 使用imgaug进行变换
args
:
-
{
'
type'
:
Fliplr
,
'
args'
:{
'
p'
:
0.5
}}
-
{
'
type'
:
Affine
,
'
args'
:{
'
rotate'
:[
-10
,
10
]}}
-
{
'
type'
:
Resize
,
'
args'
:{
'
size'
:[
0.5
,
3
]}}
-
type
:
EastRandomCropData
args
:
size
:
[
640
,
640
]
max_tries
:
50
keep_ratio
:
true
-
type
:
MakeBorderMap
args
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
type
:
MakeShrinkMap
args
:
shrink_ratio
:
0.4
min_text_size
:
8
transforms
:
# 对图片进行的变换方式
-
type
:
ToTensor
args
:
{}
-
type
:
Normalize
args
:
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
img_mode
:
RGB
load_char_annotation
:
false
expand_one_char
:
false
filter_keys
:
[
img_path
,
img_name
,
text_polys
,
texts
,
ignore_tags
,
shape
]
# 返回数据之前,从数据字典里删除的key
ignore_tags
:
[
'
*'
,
'
###'
]
loader
:
batch_size
:
1
shuffle
:
true
num_workers
:
0
collate_fn
:
'
'
validate
:
dataset
:
type
:
DetDataset
args
:
data_path
:
-
'
'
pre_processes
:
-
type
:
ResizeShortSize
args
:
short_size
:
736
resize_text_polys
:
false
transforms
:
-
type
:
ToTensor
args
:
{}
-
type
:
Normalize
args
:
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
img_mode
:
RGB
load_char_annotation
:
false
# 是否加载字符级标注
expand_one_char
:
false
# 是否对只有一个字符的框进行宽度扩充,扩充后w = w+h
filter_keys
:
[]
ignore_tags
:
[
'
*'
,
'
###'
]
loader
:
batch_size
:
1
shuffle
:
true
num_workers
:
0
collate_fn
:
ICDARCollectFN
\ No newline at end of file
benchmark/PaddleOCR_DBNet/config/open_dataset_dcn_resnet50_FPN_DBhead_polyLR.yaml
0 → 100644
View file @
4824c25b
name
:
DBNet
base
:
[
'
config/open_dataset.yaml'
]
arch
:
type
:
Model
backbone
:
type
:
deformable_resnet18
pretrained
:
true
neck
:
type
:
FPN
inner_channels
:
256
head
:
type
:
DBHead
out_channels
:
2
k
:
50
post_processing
:
type
:
SegDetectorRepresenter
args
:
thresh
:
0.3
box_thresh
:
0.7
max_candidates
:
1000
unclip_ratio
:
1.5
# from paper
metric
:
type
:
QuadMetric
args
:
is_output_polygon
:
false
loss
:
type
:
DBLoss
alpha
:
1
beta
:
10
ohem_ratio
:
3
optimizer
:
type
:
Adam
args
:
lr
:
0.001
weight_decay
:
0
amsgrad
:
true
lr_scheduler
:
type
:
WarmupPolyLR
args
:
warmup_epoch
:
3
trainer
:
seed
:
2
epochs
:
1200
log_iter
:
1
show_images_iter
:
1
resume_checkpoint
:
'
'
finetune_checkpoint
:
'
'
output_dir
:
output
visual_dl
:
false
amp
:
scale_loss
:
1024
amp_level
:
O2
custom_white_list
:
[]
custom_black_list
:
[
'
exp'
,
'
sigmoid'
,
'
concat'
]
dataset
:
train
:
dataset
:
args
:
data_path
:
-
./datasets/train.json
img_mode
:
RGB
load_char_annotation
:
false
expand_one_char
:
false
loader
:
batch_size
:
2
shuffle
:
true
num_workers
:
6
collate_fn
:
'
'
validate
:
dataset
:
args
:
data_path
:
-
./datasets/test.json
pre_processes
:
-
type
:
ResizeShortSize
args
:
short_size
:
736
resize_text_polys
:
false
img_mode
:
RGB
load_char_annotation
:
false
expand_one_char
:
false
loader
:
batch_size
:
1
shuffle
:
true
num_workers
:
6
collate_fn
:
ICDARCollectFN
benchmark/PaddleOCR_DBNet/config/open_dataset_resnest50_FPN_DBhead_polyLR.yaml
0 → 100644
View file @
4824c25b
name
:
DBNet
base
:
[
'
config/open_dataset.yaml'
]
arch
:
type
:
Model
backbone
:
type
:
resnest50
pretrained
:
true
neck
:
type
:
FPN
inner_channels
:
256
head
:
type
:
DBHead
out_channels
:
2
k
:
50
post_processing
:
type
:
SegDetectorRepresenter
args
:
thresh
:
0.3
box_thresh
:
0.7
max_candidates
:
1000
unclip_ratio
:
1.5
# from paper
metric
:
type
:
QuadMetric
args
:
is_output_polygon
:
false
loss
:
type
:
DBLoss
alpha
:
1
beta
:
10
ohem_ratio
:
3
optimizer
:
type
:
Adam
args
:
lr
:
0.001
weight_decay
:
0
amsgrad
:
true
lr_scheduler
:
type
:
WarmupPolyLR
args
:
warmup_epoch
:
3
trainer
:
seed
:
2
epochs
:
1200
log_iter
:
1
show_images_iter
:
1
resume_checkpoint
:
'
'
finetune_checkpoint
:
'
'
output_dir
:
output
visual_dl
:
false
amp
:
scale_loss
:
1024
amp_level
:
O2
custom_white_list
:
[]
custom_black_list
:
[
'
exp'
,
'
sigmoid'
,
'
concat'
]
dataset
:
train
:
dataset
:
args
:
data_path
:
-
./datasets/train.json
img_mode
:
RGB
load_char_annotation
:
false
expand_one_char
:
false
loader
:
batch_size
:
2
shuffle
:
true
num_workers
:
6
collate_fn
:
'
'
validate
:
dataset
:
args
:
data_path
:
-
./datasets/test.json
pre_processes
:
-
type
:
ResizeShortSize
args
:
short_size
:
736
resize_text_polys
:
false
img_mode
:
RGB
load_char_annotation
:
false
expand_one_char
:
false
loader
:
batch_size
:
1
shuffle
:
true
num_workers
:
6
collate_fn
:
ICDARCollectFN
benchmark/PaddleOCR_DBNet/config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml
0 → 100644
View file @
4824c25b
name
:
DBNet
base
:
[
'
config/open_dataset.yaml'
]
arch
:
type
:
Model
backbone
:
type
:
resnet18
pretrained
:
true
neck
:
type
:
FPN
inner_channels
:
256
head
:
type
:
DBHead
out_channels
:
2
k
:
50
post_processing
:
type
:
SegDetectorRepresenter
args
:
thresh
:
0.3
box_thresh
:
0.7
max_candidates
:
1000
unclip_ratio
:
1.5
# from paper
metric
:
type
:
QuadMetric
args
:
is_output_polygon
:
false
loss
:
type
:
DBLoss
alpha
:
1
beta
:
10
ohem_ratio
:
3
optimizer
:
type
:
Adam
args
:
lr
:
0.001
weight_decay
:
0
amsgrad
:
true
lr_scheduler
:
type
:
WarmupPolyLR
args
:
warmup_epoch
:
3
trainer
:
seed
:
2
epochs
:
1200
log_iter
:
1
show_images_iter
:
1
resume_checkpoint
:
'
'
finetune_checkpoint
:
'
'
output_dir
:
output
visual_dl
:
false
amp
:
scale_loss
:
1024
amp_level
:
O2
custom_white_list
:
[]
custom_black_list
:
[
'
exp'
,
'
sigmoid'
,
'
concat'
]
dataset
:
train
:
dataset
:
args
:
data_path
:
-
./datasets/train.json
transforms
:
# 对图片进行的变换方式
-
type
:
ToTensor
args
:
{}
-
type
:
Normalize
args
:
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
img_mode
:
RGB
load_char_annotation
:
false
expand_one_char
:
false
loader
:
batch_size
:
2
shuffle
:
true
num_workers
:
6
collate_fn
:
'
'
validate
:
dataset
:
args
:
data_path
:
-
./datasets/test.json
pre_processes
:
-
type
:
ResizeShortSize
args
:
short_size
:
736
resize_text_polys
:
false
img_mode
:
RGB
load_char_annotation
:
false
expand_one_char
:
false
loader
:
batch_size
:
1
shuffle
:
true
num_workers
:
6
collate_fn
:
ICDARCollectFN
benchmark/PaddleOCR_DBNet/data_loader/__init__.py
0 → 100644
View file @
4824c25b
# -*- coding: utf-8 -*-
# @Time : 2019/8/23 21:52
# @Author : zhoujun
import
copy
import
PIL
import
numpy
as
np
import
paddle
from
paddle.io
import
DataLoader
,
DistributedBatchSampler
,
BatchSampler
from
paddle.vision
import
transforms
def
get_dataset
(
data_path
,
module_name
,
transform
,
dataset_args
):
"""
获取训练dataset
:param data_path: dataset文件列表,每个文件内以如下格式存储 ‘path/to/img
\t
label’
:param module_name: 所使用的自定义dataset名称,目前只支持data_loaders.ImageDataset
:param transform: 该数据集使用的transforms
:param dataset_args: module_name的参数
:return: 如果data_path列表不为空,返回对于的ConcatDataset对象,否则None
"""
from
.
import
dataset
s_dataset
=
getattr
(
dataset
,
module_name
)(
transform
=
transform
,
data_path
=
data_path
,
**
dataset_args
)
return
s_dataset
def
get_transforms
(
transforms_config
):
tr_list
=
[]
for
item
in
transforms_config
:
if
'args'
not
in
item
:
args
=
{}
else
:
args
=
item
[
'args'
]
cls
=
getattr
(
transforms
,
item
[
'type'
])(
**
args
)
tr_list
.
append
(
cls
)
tr_list
=
transforms
.
Compose
(
tr_list
)
return
tr_list
class
ICDARCollectFN
:
def
__init__
(
self
,
*
args
,
**
kwargs
):
pass
def
__call__
(
self
,
batch
):
data_dict
=
{}
to_tensor_keys
=
[]
for
sample
in
batch
:
for
k
,
v
in
sample
.
items
():
if
k
not
in
data_dict
:
data_dict
[
k
]
=
[]
if
isinstance
(
v
,
(
np
.
ndarray
,
paddle
.
Tensor
,
PIL
.
Image
.
Image
)):
if
k
not
in
to_tensor_keys
:
to_tensor_keys
.
append
(
k
)
data_dict
[
k
].
append
(
v
)
for
k
in
to_tensor_keys
:
data_dict
[
k
]
=
paddle
.
stack
(
data_dict
[
k
],
0
)
return
data_dict
def
get_dataloader
(
module_config
,
distributed
=
False
):
if
module_config
is
None
:
return
None
config
=
copy
.
deepcopy
(
module_config
)
dataset_args
=
config
[
'dataset'
][
'args'
]
if
'transforms'
in
dataset_args
:
img_transfroms
=
get_transforms
(
dataset_args
.
pop
(
'transforms'
))
else
:
img_transfroms
=
None
# 创建数据集
dataset_name
=
config
[
'dataset'
][
'type'
]
data_path
=
dataset_args
.
pop
(
'data_path'
)
if
data_path
==
None
:
return
None
data_path
=
[
x
for
x
in
data_path
if
x
is
not
None
]
if
len
(
data_path
)
==
0
:
return
None
if
'collate_fn'
not
in
config
[
'loader'
]
or
config
[
'loader'
][
'collate_fn'
]
is
None
or
len
(
config
[
'loader'
][
'collate_fn'
])
==
0
:
config
[
'loader'
][
'collate_fn'
]
=
None
else
:
config
[
'loader'
][
'collate_fn'
]
=
eval
(
config
[
'loader'
][
'collate_fn'
])()
_dataset
=
get_dataset
(
data_path
=
data_path
,
module_name
=
dataset_name
,
transform
=
img_transfroms
,
dataset_args
=
dataset_args
)
sampler
=
None
if
distributed
:
# 3)使用DistributedSampler
batch_sampler
=
DistributedBatchSampler
(
dataset
=
_dataset
,
batch_size
=
config
[
'loader'
].
pop
(
'batch_size'
),
shuffle
=
config
[
'loader'
].
pop
(
'shuffle'
))
else
:
batch_sampler
=
BatchSampler
(
dataset
=
_dataset
,
batch_size
=
config
[
'loader'
].
pop
(
'batch_size'
),
shuffle
=
config
[
'loader'
].
pop
(
'shuffle'
))
loader
=
DataLoader
(
dataset
=
_dataset
,
batch_sampler
=
batch_sampler
,
**
config
[
'loader'
])
return
loader
benchmark/PaddleOCR_DBNet/data_loader/dataset.py
0 → 100644
View file @
4824c25b
# -*- coding: utf-8 -*-
# @Time : 2019/8/23 21:54
# @Author : zhoujun
import
pathlib
import
os
import
cv2
import
numpy
as
np
import
scipy.io
as
sio
from
tqdm.auto
import
tqdm
from
base
import
BaseDataSet
from
utils
import
order_points_clockwise
,
get_datalist
,
load
,
expand_polygon
class
ICDAR2015Dataset
(
BaseDataSet
):
def
__init__
(
self
,
data_path
:
str
,
img_mode
,
pre_processes
,
filter_keys
,
ignore_tags
,
transform
=
None
,
**
kwargs
):
super
().
__init__
(
data_path
,
img_mode
,
pre_processes
,
filter_keys
,
ignore_tags
,
transform
)
def
load_data
(
self
,
data_path
:
str
)
->
list
:
data_list
=
get_datalist
(
data_path
)
t_data_list
=
[]
for
img_path
,
label_path
in
data_list
:
data
=
self
.
_get_annotation
(
label_path
)
if
len
(
data
[
'text_polys'
])
>
0
:
item
=
{
'img_path'
:
img_path
,
'img_name'
:
pathlib
.
Path
(
img_path
).
stem
}
item
.
update
(
data
)
t_data_list
.
append
(
item
)
else
:
print
(
'there is no suit bbox in {}'
.
format
(
label_path
))
return
t_data_list
def
_get_annotation
(
self
,
label_path
:
str
)
->
dict
:
boxes
=
[]
texts
=
[]
ignores
=
[]
with
open
(
label_path
,
encoding
=
'utf-8'
,
mode
=
'r'
)
as
f
:
for
line
in
f
.
readlines
():
params
=
line
.
strip
().
strip
(
'
\ufeff
'
).
strip
(
'
\xef\xbb\xbf
'
).
split
(
','
)
try
:
box
=
order_points_clockwise
(
np
.
array
(
list
(
map
(
float
,
params
[:
8
]))).
reshape
(
-
1
,
2
))
if
cv2
.
contourArea
(
box
)
>
0
:
boxes
.
append
(
box
)
label
=
params
[
8
]
texts
.
append
(
label
)
ignores
.
append
(
label
in
self
.
ignore_tags
)
except
:
print
(
'load label failed on {}'
.
format
(
label_path
))
data
=
{
'text_polys'
:
np
.
array
(
boxes
),
'texts'
:
texts
,
'ignore_tags'
:
ignores
,
}
return
data
class
DetDataset
(
BaseDataSet
):
def
__init__
(
self
,
data_path
:
str
,
img_mode
,
pre_processes
,
filter_keys
,
ignore_tags
,
transform
=
None
,
**
kwargs
):
self
.
load_char_annotation
=
kwargs
[
'load_char_annotation'
]
self
.
expand_one_char
=
kwargs
[
'expand_one_char'
]
super
().
__init__
(
data_path
,
img_mode
,
pre_processes
,
filter_keys
,
ignore_tags
,
transform
)
def
load_data
(
self
,
data_path
:
str
)
->
list
:
"""
从json文件中读取出 文本行的坐标和gt,字符的坐标和gt
:param data_path:
:return:
"""
data_list
=
[]
for
path
in
data_path
:
content
=
load
(
path
)
for
gt
in
tqdm
(
content
[
'data_list'
],
desc
=
'read file {}'
.
format
(
path
)):
img_path
=
os
.
path
.
join
(
content
[
'data_root'
],
gt
[
'img_name'
])
polygons
=
[]
texts
=
[]
illegibility_list
=
[]
language_list
=
[]
for
annotation
in
gt
[
'annotations'
]:
if
len
(
annotation
[
'polygon'
])
==
0
or
len
(
annotation
[
'text'
])
==
0
:
continue
if
len
(
annotation
[
'text'
])
>
1
and
self
.
expand_one_char
:
annotation
[
'polygon'
]
=
expand_polygon
(
annotation
[
'polygon'
])
polygons
.
append
(
annotation
[
'polygon'
])
texts
.
append
(
annotation
[
'text'
])
illegibility_list
.
append
(
annotation
[
'illegibility'
])
language_list
.
append
(
annotation
[
'language'
])
if
self
.
load_char_annotation
:
for
char_annotation
in
annotation
[
'chars'
]:
if
len
(
char_annotation
[
'polygon'
])
==
0
or
len
(
char_annotation
[
'char'
])
==
0
:
continue
polygons
.
append
(
char_annotation
[
'polygon'
])
texts
.
append
(
char_annotation
[
'char'
])
illegibility_list
.
append
(
char_annotation
[
'illegibility'
])
language_list
.
append
(
char_annotation
[
'language'
])
data_list
.
append
({
'img_path'
:
img_path
,
'img_name'
:
gt
[
'img_name'
],
'text_polys'
:
np
.
array
(
polygons
),
'texts'
:
texts
,
'ignore_tags'
:
illegibility_list
})
return
data_list
class
SynthTextDataset
(
BaseDataSet
):
def
__init__
(
self
,
data_path
:
str
,
img_mode
,
pre_processes
,
filter_keys
,
transform
=
None
,
**
kwargs
):
self
.
transform
=
transform
self
.
dataRoot
=
pathlib
.
Path
(
data_path
)
if
not
self
.
dataRoot
.
exists
():
raise
FileNotFoundError
(
'Dataset folder is not exist.'
)
self
.
targetFilePath
=
self
.
dataRoot
/
'gt.mat'
if
not
self
.
targetFilePath
.
exists
():
raise
FileExistsError
(
'Target file is not exist.'
)
targets
=
{}
sio
.
loadmat
(
self
.
targetFilePath
,
targets
,
squeeze_me
=
True
,
struct_as_record
=
False
,
variable_names
=
[
'imnames'
,
'wordBB'
,
'txt'
])
self
.
imageNames
=
targets
[
'imnames'
]
self
.
wordBBoxes
=
targets
[
'wordBB'
]
self
.
transcripts
=
targets
[
'txt'
]
super
().
__init__
(
data_path
,
img_mode
,
pre_processes
,
filter_keys
,
transform
)
def
load_data
(
self
,
data_path
:
str
)
->
list
:
t_data_list
=
[]
for
imageName
,
wordBBoxes
,
texts
in
zip
(
self
.
imageNames
,
self
.
wordBBoxes
,
self
.
transcripts
):
item
=
{}
wordBBoxes
=
np
.
expand_dims
(
wordBBoxes
,
axis
=
2
)
if
(
wordBBoxes
.
ndim
==
2
)
else
wordBBoxes
_
,
_
,
numOfWords
=
wordBBoxes
.
shape
text_polys
=
wordBBoxes
.
reshape
(
[
8
,
numOfWords
],
order
=
'F'
).
T
# num_words * 8
text_polys
=
text_polys
.
reshape
(
numOfWords
,
4
,
2
)
# num_of_words * 4 * 2
transcripts
=
[
word
for
line
in
texts
for
word
in
line
.
split
()]
if
numOfWords
!=
len
(
transcripts
):
continue
item
[
'img_path'
]
=
str
(
self
.
dataRoot
/
imageName
)
item
[
'img_name'
]
=
(
self
.
dataRoot
/
imageName
).
stem
item
[
'text_polys'
]
=
text_polys
item
[
'texts'
]
=
transcripts
item
[
'ignore_tags'
]
=
[
x
in
self
.
ignore_tags
for
x
in
transcripts
]
t_data_list
.
append
(
item
)
return
t_data_list
benchmark/PaddleOCR_DBNet/data_loader/modules/__init__.py
0 → 100644
View file @
4824c25b
# -*- coding: utf-8 -*-
# @Time : 2019/12/4 10:53
# @Author : zhoujun
from
.iaa_augment
import
IaaAugment
from
.augment
import
*
from
.random_crop_data
import
EastRandomCropData
,
PSERandomCrop
from
.make_border_map
import
MakeBorderMap
from
.make_shrink_map
import
MakeShrinkMap
benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py
0 → 100644
View file @
4824c25b
# -*- coding: utf-8 -*-
# @Time : 2019/8/23 21:52
# @Author : zhoujun
import
math
import
numbers
import
random
import
cv2
import
numpy
as
np
from
skimage.util
import
random_noise
class
RandomNoise
:
def
__init__
(
self
,
random_rate
):
self
.
random_rate
=
random_rate
def
__call__
(
self
,
data
:
dict
):
"""
对图片加噪声
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
if
random
.
random
()
>
self
.
random_rate
:
return
data
data
[
'img'
]
=
(
random_noise
(
data
[
'img'
],
mode
=
'gaussian'
,
clip
=
True
)
*
255
).
astype
(
im
.
dtype
)
return
data
class
RandomScale
:
def
__init__
(
self
,
scales
,
random_rate
):
"""
:param scales: 尺度
:param ramdon_rate: 随机系数
:return:
"""
self
.
random_rate
=
random_rate
self
.
scales
=
scales
def
__call__
(
self
,
data
:
dict
)
->
dict
:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
if
random
.
random
()
>
self
.
random_rate
:
return
data
im
=
data
[
'img'
]
text_polys
=
data
[
'text_polys'
]
tmp_text_polys
=
text_polys
.
copy
()
rd_scale
=
float
(
np
.
random
.
choice
(
self
.
scales
))
im
=
cv2
.
resize
(
im
,
dsize
=
None
,
fx
=
rd_scale
,
fy
=
rd_scale
)
tmp_text_polys
*=
rd_scale
data
[
'img'
]
=
im
data
[
'text_polys'
]
=
tmp_text_polys
return
data
class
RandomRotateImgBox
:
def
__init__
(
self
,
degrees
,
random_rate
,
same_size
=
False
):
"""
:param degrees: 角度,可以是一个数值或者list
:param ramdon_rate: 随机系数
:param same_size: 是否保持和原图一样大
:return:
"""
if
isinstance
(
degrees
,
numbers
.
Number
):
if
degrees
<
0
:
raise
ValueError
(
"If degrees is a single number, it must be positive."
)
degrees
=
(
-
degrees
,
degrees
)
elif
isinstance
(
degrees
,
list
)
or
isinstance
(
degrees
,
tuple
)
or
isinstance
(
degrees
,
np
.
ndarray
):
if
len
(
degrees
)
!=
2
:
raise
ValueError
(
"If degrees is a sequence, it must be of len 2."
)
degrees
=
degrees
else
:
raise
Exception
(
'degrees must in Number or list or tuple or np.ndarray'
)
self
.
degrees
=
degrees
self
.
same_size
=
same_size
self
.
random_rate
=
random_rate
def
__call__
(
self
,
data
:
dict
)
->
dict
:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
if
random
.
random
()
>
self
.
random_rate
:
return
data
im
=
data
[
'img'
]
text_polys
=
data
[
'text_polys'
]
# ---------------------- 旋转图像 ----------------------
w
=
im
.
shape
[
1
]
h
=
im
.
shape
[
0
]
angle
=
np
.
random
.
uniform
(
self
.
degrees
[
0
],
self
.
degrees
[
1
])
if
self
.
same_size
:
nw
=
w
nh
=
h
else
:
# 角度变弧度
rangle
=
np
.
deg2rad
(
angle
)
# 计算旋转之后图像的w, h
nw
=
(
abs
(
np
.
sin
(
rangle
)
*
h
)
+
abs
(
np
.
cos
(
rangle
)
*
w
))
nh
=
(
abs
(
np
.
cos
(
rangle
)
*
h
)
+
abs
(
np
.
sin
(
rangle
)
*
w
))
# 构造仿射矩阵
rot_mat
=
cv2
.
getRotationMatrix2D
((
nw
*
0.5
,
nh
*
0.5
),
angle
,
1
)
# 计算原图中心点到新图中心点的偏移量
rot_move
=
np
.
dot
(
rot_mat
,
np
.
array
([(
nw
-
w
)
*
0.5
,
(
nh
-
h
)
*
0.5
,
0
]))
# 更新仿射矩阵
rot_mat
[
0
,
2
]
+=
rot_move
[
0
]
rot_mat
[
1
,
2
]
+=
rot_move
[
1
]
# 仿射变换
rot_img
=
cv2
.
warpAffine
(
im
,
rot_mat
,
(
int
(
math
.
ceil
(
nw
)),
int
(
math
.
ceil
(
nh
))),
flags
=
cv2
.
INTER_LANCZOS4
)
# ---------------------- 矫正bbox坐标 ----------------------
# rot_mat是最终的旋转矩阵
# 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下
rot_text_polys
=
list
()
for
bbox
in
text_polys
:
point1
=
np
.
dot
(
rot_mat
,
np
.
array
([
bbox
[
0
,
0
],
bbox
[
0
,
1
],
1
]))
point2
=
np
.
dot
(
rot_mat
,
np
.
array
([
bbox
[
1
,
0
],
bbox
[
1
,
1
],
1
]))
point3
=
np
.
dot
(
rot_mat
,
np
.
array
([
bbox
[
2
,
0
],
bbox
[
2
,
1
],
1
]))
point4
=
np
.
dot
(
rot_mat
,
np
.
array
([
bbox
[
3
,
0
],
bbox
[
3
,
1
],
1
]))
rot_text_polys
.
append
([
point1
,
point2
,
point3
,
point4
])
data
[
'img'
]
=
rot_img
data
[
'text_polys'
]
=
np
.
array
(
rot_text_polys
)
return
data
class
RandomResize
:
def
__init__
(
self
,
size
,
random_rate
,
keep_ratio
=
False
):
"""
:param input_size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
:param ramdon_rate: 随机系数
:param keep_ratio: 是否保持长宽比
:return:
"""
if
isinstance
(
size
,
numbers
.
Number
):
if
size
<
0
:
raise
ValueError
(
"If input_size is a single number, it must be positive."
)
size
=
(
size
,
size
)
elif
isinstance
(
size
,
list
)
or
isinstance
(
size
,
tuple
)
or
isinstance
(
size
,
np
.
ndarray
):
if
len
(
size
)
!=
2
:
raise
ValueError
(
"If input_size is a sequence, it must be of len 2."
)
size
=
(
size
[
0
],
size
[
1
])
else
:
raise
Exception
(
'input_size must in Number or list or tuple or np.ndarray'
)
self
.
size
=
size
self
.
keep_ratio
=
keep_ratio
self
.
random_rate
=
random_rate
def
__call__
(
self
,
data
:
dict
)
->
dict
:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
if
random
.
random
()
>
self
.
random_rate
:
return
data
im
=
data
[
'img'
]
text_polys
=
data
[
'text_polys'
]
if
self
.
keep_ratio
:
# 将图片短边pad到和长边一样
h
,
w
,
c
=
im
.
shape
max_h
=
max
(
h
,
self
.
size
[
0
])
max_w
=
max
(
w
,
self
.
size
[
1
])
im_padded
=
np
.
zeros
((
max_h
,
max_w
,
c
),
dtype
=
np
.
uint8
)
im_padded
[:
h
,
:
w
]
=
im
.
copy
()
im
=
im_padded
text_polys
=
text_polys
.
astype
(
np
.
float32
)
h
,
w
,
_
=
im
.
shape
im
=
cv2
.
resize
(
im
,
self
.
size
)
w_scale
=
self
.
size
[
0
]
/
float
(
w
)
h_scale
=
self
.
size
[
1
]
/
float
(
h
)
text_polys
[:,
:,
0
]
*=
w_scale
text_polys
[:,
:,
1
]
*=
h_scale
data
[
'img'
]
=
im
data
[
'text_polys'
]
=
text_polys
return
data
def
resize_image
(
img
,
short_size
):
height
,
width
,
_
=
img
.
shape
if
height
<
width
:
new_height
=
short_size
new_width
=
new_height
/
height
*
width
else
:
new_width
=
short_size
new_height
=
new_width
/
width
*
height
new_height
=
int
(
round
(
new_height
/
32
)
*
32
)
new_width
=
int
(
round
(
new_width
/
32
)
*
32
)
resized_img
=
cv2
.
resize
(
img
,
(
new_width
,
new_height
))
return
resized_img
,
(
new_width
/
width
,
new_height
/
height
)
class
ResizeShortSize
:
def
__init__
(
self
,
short_size
,
resize_text_polys
=
True
):
"""
:param size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
:return:
"""
self
.
short_size
=
short_size
self
.
resize_text_polys
=
resize_text_polys
def
__call__
(
self
,
data
:
dict
)
->
dict
:
"""
对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
im
=
data
[
'img'
]
text_polys
=
data
[
'text_polys'
]
h
,
w
,
_
=
im
.
shape
short_edge
=
min
(
h
,
w
)
if
short_edge
<
self
.
short_size
:
# 保证短边 >= short_size
scale
=
self
.
short_size
/
short_edge
im
=
cv2
.
resize
(
im
,
dsize
=
None
,
fx
=
scale
,
fy
=
scale
)
scale
=
(
scale
,
scale
)
# im, scale = resize_image(im, self.short_size)
if
self
.
resize_text_polys
:
# text_polys *= scale
text_polys
[:,
0
]
*=
scale
[
0
]
text_polys
[:,
1
]
*=
scale
[
1
]
data
[
'img'
]
=
im
data
[
'text_polys'
]
=
text_polys
return
data
class
HorizontalFlip
:
def
__init__
(
self
,
random_rate
):
"""
:param random_rate: 随机系数
"""
self
.
random_rate
=
random_rate
def
__call__
(
self
,
data
:
dict
)
->
dict
:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
if
random
.
random
()
>
self
.
random_rate
:
return
data
im
=
data
[
'img'
]
text_polys
=
data
[
'text_polys'
]
flip_text_polys
=
text_polys
.
copy
()
flip_im
=
cv2
.
flip
(
im
,
1
)
h
,
w
,
_
=
flip_im
.
shape
flip_text_polys
[:,
:,
0
]
=
w
-
flip_text_polys
[:,
:,
0
]
data
[
'img'
]
=
flip_im
data
[
'text_polys'
]
=
flip_text_polys
return
data
class
VerticallFlip
:
def
__init__
(
self
,
random_rate
):
"""
:param random_rate: 随机系数
"""
self
.
random_rate
=
random_rate
def
__call__
(
self
,
data
:
dict
)
->
dict
:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
if
random
.
random
()
>
self
.
random_rate
:
return
data
im
=
data
[
'img'
]
text_polys
=
data
[
'text_polys'
]
flip_text_polys
=
text_polys
.
copy
()
flip_im
=
cv2
.
flip
(
im
,
0
)
h
,
w
,
_
=
flip_im
.
shape
flip_text_polys
[:,
:,
1
]
=
h
-
flip_text_polys
[:,
:,
1
]
data
[
'img'
]
=
flip_im
data
[
'text_polys'
]
=
flip_text_polys
return
data
benchmark/PaddleOCR_DBNet/data_loader/modules/iaa_augment.py
0 → 100644
View file @
4824c25b
# -*- coding: utf-8 -*-
# @Time : 2019/12/4 18:06
# @Author : zhoujun
import
numpy
as
np
import
imgaug
import
imgaug.augmenters
as
iaa
class
AugmenterBuilder
(
object
):
def
__init__
(
self
):
pass
def
build
(
self
,
args
,
root
=
True
):
if
args
is
None
or
len
(
args
)
==
0
:
return
None
elif
isinstance
(
args
,
list
):
if
root
:
sequence
=
[
self
.
build
(
value
,
root
=
False
)
for
value
in
args
]
return
iaa
.
Sequential
(
sequence
)
else
:
return
getattr
(
iaa
,
args
[
0
])(
*
[
self
.
to_tuple_if_list
(
a
)
for
a
in
args
[
1
:]])
elif
isinstance
(
args
,
dict
):
cls
=
getattr
(
iaa
,
args
[
'type'
])
return
cls
(
**
{
k
:
self
.
to_tuple_if_list
(
v
)
for
k
,
v
in
args
[
'args'
].
items
()
})
else
:
raise
RuntimeError
(
'unknown augmenter arg: '
+
str
(
args
))
def
to_tuple_if_list
(
self
,
obj
):
if
isinstance
(
obj
,
list
):
return
tuple
(
obj
)
return
obj
class
IaaAugment
():
def
__init__
(
self
,
augmenter_args
):
self
.
augmenter_args
=
augmenter_args
self
.
augmenter
=
AugmenterBuilder
().
build
(
self
.
augmenter_args
)
def
__call__
(
self
,
data
):
image
=
data
[
'img'
]
shape
=
image
.
shape
if
self
.
augmenter
:
aug
=
self
.
augmenter
.
to_deterministic
()
data
[
'img'
]
=
aug
.
augment_image
(
image
)
data
=
self
.
may_augment_annotation
(
aug
,
data
,
shape
)
return
data
def
may_augment_annotation
(
self
,
aug
,
data
,
shape
):
if
aug
is
None
:
return
data
line_polys
=
[]
for
poly
in
data
[
'text_polys'
]:
new_poly
=
self
.
may_augment_poly
(
aug
,
shape
,
poly
)
line_polys
.
append
(
new_poly
)
data
[
'text_polys'
]
=
np
.
array
(
line_polys
)
return
data
def
may_augment_poly
(
self
,
aug
,
img_shape
,
poly
):
keypoints
=
[
imgaug
.
Keypoint
(
p
[
0
],
p
[
1
])
for
p
in
poly
]
keypoints
=
aug
.
augment_keypoints
(
[
imgaug
.
KeypointsOnImage
(
keypoints
,
shape
=
img_shape
)])[
0
].
keypoints
poly
=
[(
p
.
x
,
p
.
y
)
for
p
in
keypoints
]
return
poly
benchmark/PaddleOCR_DBNet/data_loader/modules/make_border_map.py
0 → 100644
View file @
4824c25b
import
cv2
import
numpy
as
np
np
.
seterr
(
divide
=
'ignore'
,
invalid
=
'ignore'
)
import
pyclipper
from
shapely.geometry
import
Polygon
class
MakeBorderMap
():
def
__init__
(
self
,
shrink_ratio
=
0.4
,
thresh_min
=
0.3
,
thresh_max
=
0.7
):
self
.
shrink_ratio
=
shrink_ratio
self
.
thresh_min
=
thresh_min
self
.
thresh_max
=
thresh_max
def
__call__
(
self
,
data
:
dict
)
->
dict
:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
im
=
data
[
'img'
]
text_polys
=
data
[
'text_polys'
]
ignore_tags
=
data
[
'ignore_tags'
]
canvas
=
np
.
zeros
(
im
.
shape
[:
2
],
dtype
=
np
.
float32
)
mask
=
np
.
zeros
(
im
.
shape
[:
2
],
dtype
=
np
.
float32
)
for
i
in
range
(
len
(
text_polys
)):
if
ignore_tags
[
i
]:
continue
self
.
draw_border_map
(
text_polys
[
i
],
canvas
,
mask
=
mask
)
canvas
=
canvas
*
(
self
.
thresh_max
-
self
.
thresh_min
)
+
self
.
thresh_min
data
[
'threshold_map'
]
=
canvas
data
[
'threshold_mask'
]
=
mask
return
data
def
draw_border_map
(
self
,
polygon
,
canvas
,
mask
):
polygon
=
np
.
array
(
polygon
)
assert
polygon
.
ndim
==
2
assert
polygon
.
shape
[
1
]
==
2
polygon_shape
=
Polygon
(
polygon
)
if
polygon_shape
.
area
<=
0
:
return
distance
=
polygon_shape
.
area
*
(
1
-
np
.
power
(
self
.
shrink_ratio
,
2
))
/
polygon_shape
.
length
subject
=
[
tuple
(
l
)
for
l
in
polygon
]
padding
=
pyclipper
.
PyclipperOffset
()
padding
.
AddPath
(
subject
,
pyclipper
.
JT_ROUND
,
pyclipper
.
ET_CLOSEDPOLYGON
)
padded_polygon
=
np
.
array
(
padding
.
Execute
(
distance
)[
0
])
cv2
.
fillPoly
(
mask
,
[
padded_polygon
.
astype
(
np
.
int32
)],
1.0
)
xmin
=
padded_polygon
[:,
0
].
min
()
xmax
=
padded_polygon
[:,
0
].
max
()
ymin
=
padded_polygon
[:,
1
].
min
()
ymax
=
padded_polygon
[:,
1
].
max
()
width
=
xmax
-
xmin
+
1
height
=
ymax
-
ymin
+
1
polygon
[:,
0
]
=
polygon
[:,
0
]
-
xmin
polygon
[:,
1
]
=
polygon
[:,
1
]
-
ymin
xs
=
np
.
broadcast_to
(
np
.
linspace
(
0
,
width
-
1
,
num
=
width
).
reshape
(
1
,
width
),
(
height
,
width
))
ys
=
np
.
broadcast_to
(
np
.
linspace
(
0
,
height
-
1
,
num
=
height
).
reshape
(
height
,
1
),
(
height
,
width
))
distance_map
=
np
.
zeros
(
(
polygon
.
shape
[
0
],
height
,
width
),
dtype
=
np
.
float32
)
for
i
in
range
(
polygon
.
shape
[
0
]):
j
=
(
i
+
1
)
%
polygon
.
shape
[
0
]
absolute_distance
=
self
.
distance
(
xs
,
ys
,
polygon
[
i
],
polygon
[
j
])
distance_map
[
i
]
=
np
.
clip
(
absolute_distance
/
distance
,
0
,
1
)
distance_map
=
distance_map
.
min
(
axis
=
0
)
xmin_valid
=
min
(
max
(
0
,
xmin
),
canvas
.
shape
[
1
]
-
1
)
xmax_valid
=
min
(
max
(
0
,
xmax
),
canvas
.
shape
[
1
]
-
1
)
ymin_valid
=
min
(
max
(
0
,
ymin
),
canvas
.
shape
[
0
]
-
1
)
ymax_valid
=
min
(
max
(
0
,
ymax
),
canvas
.
shape
[
0
]
-
1
)
canvas
[
ymin_valid
:
ymax_valid
+
1
,
xmin_valid
:
xmax_valid
+
1
]
=
np
.
fmax
(
1
-
distance_map
[
ymin_valid
-
ymin
:
ymax_valid
-
ymax
+
height
,
xmin_valid
-
xmin
:
xmax_valid
-
xmax
+
width
],
canvas
[
ymin_valid
:
ymax_valid
+
1
,
xmin_valid
:
xmax_valid
+
1
])
def
distance
(
self
,
xs
,
ys
,
point_1
,
point_2
):
'''
compute the distance from point to a line
ys: coordinates in the first axis
xs: coordinates in the second axis
point_1, point_2: (x, y), the end of the line
'''
height
,
width
=
xs
.
shape
[:
2
]
square_distance_1
=
np
.
square
(
xs
-
point_1
[
0
])
+
np
.
square
(
ys
-
point_1
[
1
])
square_distance_2
=
np
.
square
(
xs
-
point_2
[
0
])
+
np
.
square
(
ys
-
point_2
[
1
])
square_distance
=
np
.
square
(
point_1
[
0
]
-
point_2
[
0
])
+
np
.
square
(
point_1
[
1
]
-
point_2
[
1
])
cosin
=
(
square_distance
-
square_distance_1
-
square_distance_2
)
/
(
2
*
np
.
sqrt
(
square_distance_1
*
square_distance_2
))
square_sin
=
1
-
np
.
square
(
cosin
)
square_sin
=
np
.
nan_to_num
(
square_sin
)
result
=
np
.
sqrt
(
square_distance_1
*
square_distance_2
*
square_sin
/
square_distance
)
result
[
cosin
<
0
]
=
np
.
sqrt
(
np
.
fmin
(
square_distance_1
,
square_distance_2
))[
cosin
<
0
]
# self.extend_line(point_1, point_2, result)
return
result
def
extend_line
(
self
,
point_1
,
point_2
,
result
):
ex_point_1
=
(
int
(
round
(
point_1
[
0
]
+
(
point_1
[
0
]
-
point_2
[
0
])
*
(
1
+
self
.
shrink_ratio
))),
int
(
round
(
point_1
[
1
]
+
(
point_1
[
1
]
-
point_2
[
1
])
*
(
1
+
self
.
shrink_ratio
))))
cv2
.
line
(
result
,
tuple
(
ex_point_1
),
tuple
(
point_1
),
4096.0
,
1
,
lineType
=
cv2
.
LINE_AA
,
shift
=
0
)
ex_point_2
=
(
int
(
round
(
point_2
[
0
]
+
(
point_2
[
0
]
-
point_1
[
0
])
*
(
1
+
self
.
shrink_ratio
))),
int
(
round
(
point_2
[
1
]
+
(
point_2
[
1
]
-
point_1
[
1
])
*
(
1
+
self
.
shrink_ratio
))))
cv2
.
line
(
result
,
tuple
(
ex_point_2
),
tuple
(
point_2
),
4096.0
,
1
,
lineType
=
cv2
.
LINE_AA
,
shift
=
0
)
return
ex_point_1
,
ex_point_2
benchmark/PaddleOCR_DBNet/data_loader/modules/make_shrink_map.py
0 → 100644
View file @
4824c25b
import
numpy
as
np
import
cv2
def
shrink_polygon_py
(
polygon
,
shrink_ratio
):
"""
对框进行缩放,返回去的比例为1/shrink_ratio 即可
"""
cx
=
polygon
[:,
0
].
mean
()
cy
=
polygon
[:,
1
].
mean
()
polygon
[:,
0
]
=
cx
+
(
polygon
[:,
0
]
-
cx
)
*
shrink_ratio
polygon
[:,
1
]
=
cy
+
(
polygon
[:,
1
]
-
cy
)
*
shrink_ratio
return
polygon
def
shrink_polygon_pyclipper
(
polygon
,
shrink_ratio
):
from
shapely.geometry
import
Polygon
import
pyclipper
polygon_shape
=
Polygon
(
polygon
)
distance
=
polygon_shape
.
area
*
(
1
-
np
.
power
(
shrink_ratio
,
2
))
/
polygon_shape
.
length
subject
=
[
tuple
(
l
)
for
l
in
polygon
]
padding
=
pyclipper
.
PyclipperOffset
()
padding
.
AddPath
(
subject
,
pyclipper
.
JT_ROUND
,
pyclipper
.
ET_CLOSEDPOLYGON
)
shrinked
=
padding
.
Execute
(
-
distance
)
if
shrinked
==
[]:
shrinked
=
np
.
array
(
shrinked
)
else
:
shrinked
=
np
.
array
(
shrinked
[
0
]).
reshape
(
-
1
,
2
)
return
shrinked
class
MakeShrinkMap
():
r
'''
Making binary mask from detection data with ICDAR format.
Typically following the process of class `MakeICDARData`.
'''
def
__init__
(
self
,
min_text_size
=
8
,
shrink_ratio
=
0.4
,
shrink_type
=
'pyclipper'
):
shrink_func_dict
=
{
'py'
:
shrink_polygon_py
,
'pyclipper'
:
shrink_polygon_pyclipper
}
self
.
shrink_func
=
shrink_func_dict
[
shrink_type
]
self
.
min_text_size
=
min_text_size
self
.
shrink_ratio
=
shrink_ratio
def
__call__
(
self
,
data
:
dict
)
->
dict
:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
image
=
data
[
'img'
]
text_polys
=
data
[
'text_polys'
]
ignore_tags
=
data
[
'ignore_tags'
]
h
,
w
=
image
.
shape
[:
2
]
text_polys
,
ignore_tags
=
self
.
validate_polygons
(
text_polys
,
ignore_tags
,
h
,
w
)
gt
=
np
.
zeros
((
h
,
w
),
dtype
=
np
.
float32
)
mask
=
np
.
ones
((
h
,
w
),
dtype
=
np
.
float32
)
for
i
in
range
(
len
(
text_polys
)):
polygon
=
text_polys
[
i
]
height
=
max
(
polygon
[:,
1
])
-
min
(
polygon
[:,
1
])
width
=
max
(
polygon
[:,
0
])
-
min
(
polygon
[:,
0
])
if
ignore_tags
[
i
]
or
min
(
height
,
width
)
<
self
.
min_text_size
:
cv2
.
fillPoly
(
mask
,
polygon
.
astype
(
np
.
int32
)[
np
.
newaxis
,
:,
:],
0
)
ignore_tags
[
i
]
=
True
else
:
shrinked
=
self
.
shrink_func
(
polygon
,
self
.
shrink_ratio
)
if
shrinked
.
size
==
0
:
cv2
.
fillPoly
(
mask
,
polygon
.
astype
(
np
.
int32
)[
np
.
newaxis
,
:,
:],
0
)
ignore_tags
[
i
]
=
True
continue
cv2
.
fillPoly
(
gt
,
[
shrinked
.
astype
(
np
.
int32
)],
1
)
data
[
'shrink_map'
]
=
gt
data
[
'shrink_mask'
]
=
mask
return
data
def
validate_polygons
(
self
,
polygons
,
ignore_tags
,
h
,
w
):
'''
polygons (numpy.array, required): of shape (num_instances, num_points, 2)
'''
if
len
(
polygons
)
==
0
:
return
polygons
,
ignore_tags
assert
len
(
polygons
)
==
len
(
ignore_tags
)
for
polygon
in
polygons
:
polygon
[:,
0
]
=
np
.
clip
(
polygon
[:,
0
],
0
,
w
-
1
)
polygon
[:,
1
]
=
np
.
clip
(
polygon
[:,
1
],
0
,
h
-
1
)
for
i
in
range
(
len
(
polygons
)):
area
=
self
.
polygon_area
(
polygons
[
i
])
if
abs
(
area
)
<
1
:
ignore_tags
[
i
]
=
True
if
area
>
0
:
polygons
[
i
]
=
polygons
[
i
][::
-
1
,
:]
return
polygons
,
ignore_tags
def
polygon_area
(
self
,
polygon
):
return
cv2
.
contourArea
(
polygon
)
# edge = 0
# for i in range(polygon.shape[0]):
# next_index = (i + 1) % polygon.shape[0]
# edge += (polygon[next_index, 0] - polygon[i, 0]) * (polygon[next_index, 1] - polygon[i, 1])
#
# return edge / 2.
if
__name__
==
'__main__'
:
from
shapely.geometry
import
Polygon
import
pyclipper
polygon
=
np
.
array
([[
0
,
0
],
[
100
,
10
],
[
100
,
100
],
[
10
,
90
]])
a
=
shrink_polygon_py
(
polygon
,
0.4
)
print
(
a
)
print
(
shrink_polygon_py
(
a
,
1
/
0.4
))
b
=
shrink_polygon_pyclipper
(
polygon
,
0.4
)
print
(
b
)
poly
=
Polygon
(
b
)
distance
=
poly
.
area
*
1.5
/
poly
.
length
offset
=
pyclipper
.
PyclipperOffset
()
offset
.
AddPath
(
b
,
pyclipper
.
JT_ROUND
,
pyclipper
.
ET_CLOSEDPOLYGON
)
expanded
=
np
.
array
(
offset
.
Execute
(
distance
))
bounding_box
=
cv2
.
minAreaRect
(
expanded
)
points
=
cv2
.
boxPoints
(
bounding_box
)
print
(
points
)
benchmark/PaddleOCR_DBNet/data_loader/modules/random_crop_data.py
0 → 100644
View file @
4824c25b
import
random
import
cv2
import
numpy
as
np
# random crop algorithm similar to https://github.com/argman/EAST
class
EastRandomCropData
():
def
__init__
(
self
,
size
=
(
640
,
640
),
max_tries
=
50
,
min_crop_side_ratio
=
0.1
,
require_original_image
=
False
,
keep_ratio
=
True
):
self
.
size
=
size
self
.
max_tries
=
max_tries
self
.
min_crop_side_ratio
=
min_crop_side_ratio
self
.
require_original_image
=
require_original_image
self
.
keep_ratio
=
keep_ratio
def
__call__
(
self
,
data
:
dict
)
->
dict
:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
im
=
data
[
'img'
]
text_polys
=
data
[
'text_polys'
]
ignore_tags
=
data
[
'ignore_tags'
]
texts
=
data
[
'texts'
]
all_care_polys
=
[
text_polys
[
i
]
for
i
,
tag
in
enumerate
(
ignore_tags
)
if
not
tag
]
# 计算crop区域
crop_x
,
crop_y
,
crop_w
,
crop_h
=
self
.
crop_area
(
im
,
all_care_polys
)
# crop 图片 保持比例填充
scale_w
=
self
.
size
[
0
]
/
crop_w
scale_h
=
self
.
size
[
1
]
/
crop_h
scale
=
min
(
scale_w
,
scale_h
)
h
=
int
(
crop_h
*
scale
)
w
=
int
(
crop_w
*
scale
)
if
self
.
keep_ratio
:
if
len
(
im
.
shape
)
==
3
:
padimg
=
np
.
zeros
((
self
.
size
[
1
],
self
.
size
[
0
],
im
.
shape
[
2
]),
im
.
dtype
)
else
:
padimg
=
np
.
zeros
((
self
.
size
[
1
],
self
.
size
[
0
]),
im
.
dtype
)
padimg
[:
h
,
:
w
]
=
cv2
.
resize
(
im
[
crop_y
:
crop_y
+
crop_h
,
crop_x
:
crop_x
+
crop_w
],
(
w
,
h
))
img
=
padimg
else
:
img
=
cv2
.
resize
(
im
[
crop_y
:
crop_y
+
crop_h
,
crop_x
:
crop_x
+
crop_w
],
tuple
(
self
.
size
))
# crop 文本框
text_polys_crop
=
[]
ignore_tags_crop
=
[]
texts_crop
=
[]
for
poly
,
text
,
tag
in
zip
(
text_polys
,
texts
,
ignore_tags
):
poly
=
((
poly
-
(
crop_x
,
crop_y
))
*
scale
).
tolist
()
if
not
self
.
is_poly_outside_rect
(
poly
,
0
,
0
,
w
,
h
):
text_polys_crop
.
append
(
poly
)
ignore_tags_crop
.
append
(
tag
)
texts_crop
.
append
(
text
)
data
[
'img'
]
=
img
data
[
'text_polys'
]
=
np
.
float32
(
text_polys_crop
)
data
[
'ignore_tags'
]
=
ignore_tags_crop
data
[
'texts'
]
=
texts_crop
return
data
def
is_poly_in_rect
(
self
,
poly
,
x
,
y
,
w
,
h
):
poly
=
np
.
array
(
poly
)
if
poly
[:,
0
].
min
()
<
x
or
poly
[:,
0
].
max
()
>
x
+
w
:
return
False
if
poly
[:,
1
].
min
()
<
y
or
poly
[:,
1
].
max
()
>
y
+
h
:
return
False
return
True
def
is_poly_outside_rect
(
self
,
poly
,
x
,
y
,
w
,
h
):
poly
=
np
.
array
(
poly
)
if
poly
[:,
0
].
max
()
<
x
or
poly
[:,
0
].
min
()
>
x
+
w
:
return
True
if
poly
[:,
1
].
max
()
<
y
or
poly
[:,
1
].
min
()
>
y
+
h
:
return
True
return
False
def
split_regions
(
self
,
axis
):
regions
=
[]
min_axis
=
0
for
i
in
range
(
1
,
axis
.
shape
[
0
]):
if
axis
[
i
]
!=
axis
[
i
-
1
]
+
1
:
region
=
axis
[
min_axis
:
i
]
min_axis
=
i
regions
.
append
(
region
)
return
regions
def
random_select
(
self
,
axis
,
max_size
):
xx
=
np
.
random
.
choice
(
axis
,
size
=
2
)
xmin
=
np
.
min
(
xx
)
xmax
=
np
.
max
(
xx
)
xmin
=
np
.
clip
(
xmin
,
0
,
max_size
-
1
)
xmax
=
np
.
clip
(
xmax
,
0
,
max_size
-
1
)
return
xmin
,
xmax
def
region_wise_random_select
(
self
,
regions
,
max_size
):
selected_index
=
list
(
np
.
random
.
choice
(
len
(
regions
),
2
))
selected_values
=
[]
for
index
in
selected_index
:
axis
=
regions
[
index
]
xx
=
int
(
np
.
random
.
choice
(
axis
,
size
=
1
))
selected_values
.
append
(
xx
)
xmin
=
min
(
selected_values
)
xmax
=
max
(
selected_values
)
return
xmin
,
xmax
def
crop_area
(
self
,
im
,
text_polys
):
h
,
w
=
im
.
shape
[:
2
]
h_array
=
np
.
zeros
(
h
,
dtype
=
np
.
int32
)
w_array
=
np
.
zeros
(
w
,
dtype
=
np
.
int32
)
for
points
in
text_polys
:
points
=
np
.
round
(
points
,
decimals
=
0
).
astype
(
np
.
int32
)
minx
=
np
.
min
(
points
[:,
0
])
maxx
=
np
.
max
(
points
[:,
0
])
w_array
[
minx
:
maxx
]
=
1
miny
=
np
.
min
(
points
[:,
1
])
maxy
=
np
.
max
(
points
[:,
1
])
h_array
[
miny
:
maxy
]
=
1
# ensure the cropped area not across a text
h_axis
=
np
.
where
(
h_array
==
0
)[
0
]
w_axis
=
np
.
where
(
w_array
==
0
)[
0
]
if
len
(
h_axis
)
==
0
or
len
(
w_axis
)
==
0
:
return
0
,
0
,
w
,
h
h_regions
=
self
.
split_regions
(
h_axis
)
w_regions
=
self
.
split_regions
(
w_axis
)
for
i
in
range
(
self
.
max_tries
):
if
len
(
w_regions
)
>
1
:
xmin
,
xmax
=
self
.
region_wise_random_select
(
w_regions
,
w
)
else
:
xmin
,
xmax
=
self
.
random_select
(
w_axis
,
w
)
if
len
(
h_regions
)
>
1
:
ymin
,
ymax
=
self
.
region_wise_random_select
(
h_regions
,
h
)
else
:
ymin
,
ymax
=
self
.
random_select
(
h_axis
,
h
)
if
xmax
-
xmin
<
self
.
min_crop_side_ratio
*
w
or
ymax
-
ymin
<
self
.
min_crop_side_ratio
*
h
:
# area too small
continue
num_poly_in_rect
=
0
for
poly
in
text_polys
:
if
not
self
.
is_poly_outside_rect
(
poly
,
xmin
,
ymin
,
xmax
-
xmin
,
ymax
-
ymin
):
num_poly_in_rect
+=
1
break
if
num_poly_in_rect
>
0
:
return
xmin
,
ymin
,
xmax
-
xmin
,
ymax
-
ymin
return
0
,
0
,
w
,
h
class
PSERandomCrop
():
def
__init__
(
self
,
size
):
self
.
size
=
size
def
__call__
(
self
,
data
):
imgs
=
data
[
'imgs'
]
h
,
w
=
imgs
[
0
].
shape
[
0
:
2
]
th
,
tw
=
self
.
size
if
w
==
tw
and
h
==
th
:
return
imgs
# label中存在文本实例,并且按照概率进行裁剪,使用threshold_label_map控制
if
np
.
max
(
imgs
[
2
])
>
0
and
random
.
random
()
>
3
/
8
:
# 文本实例的左上角点
tl
=
np
.
min
(
np
.
where
(
imgs
[
2
]
>
0
),
axis
=
1
)
-
self
.
size
tl
[
tl
<
0
]
=
0
# 文本实例的右下角点
br
=
np
.
max
(
np
.
where
(
imgs
[
2
]
>
0
),
axis
=
1
)
-
self
.
size
br
[
br
<
0
]
=
0
# 保证选到右下角点时,有足够的距离进行crop
br
[
0
]
=
min
(
br
[
0
],
h
-
th
)
br
[
1
]
=
min
(
br
[
1
],
w
-
tw
)
for
_
in
range
(
50000
):
i
=
random
.
randint
(
tl
[
0
],
br
[
0
])
j
=
random
.
randint
(
tl
[
1
],
br
[
1
])
# 保证shrink_label_map有文本
if
imgs
[
1
][
i
:
i
+
th
,
j
:
j
+
tw
].
sum
()
<=
0
:
continue
else
:
break
else
:
i
=
random
.
randint
(
0
,
h
-
th
)
j
=
random
.
randint
(
0
,
w
-
tw
)
# return i, j, th, tw
for
idx
in
range
(
len
(
imgs
)):
if
len
(
imgs
[
idx
].
shape
)
==
3
:
imgs
[
idx
]
=
imgs
[
idx
][
i
:
i
+
th
,
j
:
j
+
tw
,
:]
else
:
imgs
[
idx
]
=
imgs
[
idx
][
i
:
i
+
th
,
j
:
j
+
tw
]
data
[
'imgs'
]
=
imgs
return
data
benchmark/PaddleOCR_DBNet/environment.yml
0 → 100644
View file @
4824c25b
name
:
dbnet
channels
:
-
conda-forge
-
defaults
dependencies
:
-
anyconfig==0.9.10
-
future==0.18.2
-
imgaug==0.4.0
-
matplotlib==3.1.2
-
numpy==1.17.4
-
opencv
-
pyclipper
-
PyYAML==5.2
-
scikit-image==0.16.2
-
Shapely==1.6.4
-
tensorboard=2
-
tqdm==4.40.1
-
ipython
-
pip
-
pip
:
-
polygon3
benchmark/PaddleOCR_DBNet/eval.sh
0 → 100644
View file @
4824c25b
CUDA_VISIBLE_DEVICES
=
0 python3 tools/eval.py
--model_path
''
\ No newline at end of file
benchmark/PaddleOCR_DBNet/generate_lists.sh
0 → 100644
View file @
4824c25b
#Only use if your file names of the images and txts are identical
rm
./datasets/train_img.txt
rm
./datasets/train_gt.txt
rm
./datasets/test_img.txt
rm
./datasets/test_gt.txt
rm
./datasets/train.txt
rm
./datasets/test.txt
ls
./datasets/train/img/
*
.jpg
>
./datasets/train_img.txt
ls
./datasets/train/gt/
*
.txt
>
./datasets/train_gt.txt
ls
./datasets/test/img/
*
.jpg
>
./datasets/test_img.txt
ls
./datasets/test/gt/
*
.txt
>
./datasets/test_gt.txt
paste
./datasets/train_img.txt ./datasets/train_gt.txt
>
./datasets/train.txt
paste
./datasets/test_img.txt ./datasets/test_gt.txt
>
./datasets/test.txt
rm
./datasets/train_img.txt
rm
./datasets/train_gt.txt
rm
./datasets/test_img.txt
rm
./datasets/test_gt.txt
benchmark/PaddleOCR_DBNet/imgs/paper/db.jpg
0 → 100644
View file @
4824c25b
190 KB
benchmark/PaddleOCR_DBNet/models/__init__.py
0 → 100644
View file @
4824c25b
# -*- coding: utf-8 -*-
# @Time : 2019/8/23 21:55
# @Author : zhoujun
import
copy
from
.model
import
Model
from
.losses
import
build_loss
__all__
=
[
'build_loss'
,
'build_model'
]
support_model
=
[
'Model'
]
def
build_model
(
config
):
"""
get architecture model class
"""
copy_config
=
copy
.
deepcopy
(
config
)
arch_type
=
copy_config
.
pop
(
'type'
)
assert
arch_type
in
support_model
,
f
'
{
arch_type
}
is not developed yet!, only
{
support_model
}
are support now'
arch_model
=
eval
(
arch_type
)(
copy_config
)
return
arch_model
benchmark/PaddleOCR_DBNet/models/backbone/__init__.py
0 → 100644
View file @
4824c25b
# -*- coding: utf-8 -*-
# @Time : 2019/8/23 21:54
# @Author : zhoujun
from
.resnet
import
*
__all__
=
[
'build_backbone'
]
support_backbone
=
[
'resnet18'
,
'deformable_resnet18'
,
'deformable_resnet50'
,
'resnet50'
,
'resnet34'
,
'resnet101'
,
'resnet152'
]
def
build_backbone
(
backbone_name
,
**
kwargs
):
assert
backbone_name
in
support_backbone
,
f
'all support backbone is
{
support_backbone
}
'
backbone
=
eval
(
backbone_name
)(
**
kwargs
)
return
backbone
benchmark/PaddleOCR_DBNet/models/backbone/resnet.py
0 → 100644
View file @
4824c25b
import
math
import
paddle
from
paddle
import
nn
BatchNorm2d
=
nn
.
BatchNorm2D
__all__
=
[
'ResNet'
,
'resnet18'
,
'resnet34'
,
'resnet50'
,
'resnet101'
,
'deformable_resnet18'
,
'deformable_resnet50'
,
'resnet152'
]
model_urls
=
{
'resnet18'
:
'https://download.pytorch.org/models/resnet18-5c106cde.pth'
,
'resnet34'
:
'https://download.pytorch.org/models/resnet34-333f7ec4.pth'
,
'resnet50'
:
'https://download.pytorch.org/models/resnet50-19c8e357.pth'
,
'resnet101'
:
'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'
,
'resnet152'
:
'https://download.pytorch.org/models/resnet152-b121ed2d.pth'
,
}
def
constant_init
(
module
,
constant
,
bias
=
0
):
module
.
weight
=
paddle
.
create_parameter
(
shape
=
module
.
weight
.
shape
,
dtype
=
'float32'
,
default_initializer
=
paddle
.
nn
.
initializer
.
Constant
(
constant
))
if
hasattr
(
module
,
'bias'
):
module
.
bias
=
paddle
.
create_parameter
(
shape
=
module
.
bias
.
shape
,
dtype
=
'float32'
,
default_initializer
=
paddle
.
nn
.
initializer
.
Constant
(
bias
))
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
):
"""3x3 convolution with padding"""
return
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias_attr
=
False
)
class
BasicBlock
(
nn
.
Layer
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
downsample
=
None
,
dcn
=
None
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
with_dcn
=
dcn
is
not
None
self
.
conv1
=
conv3x3
(
inplanes
,
planes
,
stride
)
self
.
bn1
=
BatchNorm2d
(
planes
,
momentum
=
0.1
)
self
.
relu
=
nn
.
ReLU
()
self
.
with_modulated_dcn
=
False
if
not
self
.
with_dcn
:
self
.
conv2
=
nn
.
Conv2D
(
planes
,
planes
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
)
else
:
from
paddle.version.ops
import
DeformConv2D
deformable_groups
=
dcn
.
get
(
'deformable_groups'
,
1
)
offset_channels
=
18
self
.
conv2_offset
=
nn
.
Conv2D
(
planes
,
deformable_groups
*
offset_channels
,
kernel_size
=
3
,
padding
=
1
)
self
.
conv2
=
DeformConv2D
(
planes
,
planes
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
)
self
.
bn2
=
BatchNorm2d
(
planes
,
momentum
=
0.1
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
# out = self.conv2(out)
if
not
self
.
with_dcn
:
out
=
self
.
conv2
(
out
)
else
:
offset
=
self
.
conv2_offset
(
out
)
out
=
self
.
conv2
(
out
,
offset
)
out
=
self
.
bn2
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
Bottleneck
(
nn
.
Layer
):
expansion
=
4
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
downsample
=
None
,
dcn
=
None
):
super
(
Bottleneck
,
self
).
__init__
()
self
.
with_dcn
=
dcn
is
not
None
self
.
conv1
=
nn
.
Conv2D
(
inplanes
,
planes
,
kernel_size
=
1
,
bias_attr
=
False
)
self
.
bn1
=
BatchNorm2d
(
planes
,
momentum
=
0.1
)
self
.
with_modulated_dcn
=
False
if
not
self
.
with_dcn
:
self
.
conv2
=
nn
.
Conv2D
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias_attr
=
False
)
else
:
deformable_groups
=
dcn
.
get
(
'deformable_groups'
,
1
)
from
paddle.vision.ops
import
DeformConv2D
offset_channels
=
18
self
.
conv2_offset
=
nn
.
Conv2D
(
planes
,
deformable_groups
*
offset_channels
,
stride
=
stride
,
kernel_size
=
3
,
padding
=
1
)
self
.
conv2
=
DeformConv2D
(
planes
,
planes
,
kernel_size
=
3
,
padding
=
1
,
stride
=
stride
,
bias_attr
=
False
)
self
.
bn2
=
BatchNorm2d
(
planes
,
momentum
=
0.1
)
self
.
conv3
=
nn
.
Conv2D
(
planes
,
planes
*
4
,
kernel_size
=
1
,
bias_attr
=
False
)
self
.
bn3
=
BatchNorm2d
(
planes
*
4
,
momentum
=
0.1
)
self
.
relu
=
nn
.
ReLU
()
self
.
downsample
=
downsample
self
.
stride
=
stride
self
.
dcn
=
dcn
self
.
with_dcn
=
dcn
is
not
None
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
# out = self.conv2(out)
if
not
self
.
with_dcn
:
out
=
self
.
conv2
(
out
)
else
:
offset
=
self
.
conv2_offset
(
out
)
out
=
self
.
conv2
(
out
,
offset
)
out
=
self
.
bn2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
bn3
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
ResNet
(
nn
.
Layer
):
def
__init__
(
self
,
block
,
layers
,
in_channels
=
3
,
dcn
=
None
):
self
.
dcn
=
dcn
self
.
inplanes
=
64
super
(
ResNet
,
self
).
__init__
()
self
.
out_channels
=
[]
self
.
conv1
=
nn
.
Conv2D
(
in_channels
,
64
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
bias_attr
=
False
)
self
.
bn1
=
BatchNorm2d
(
64
,
momentum
=
0.1
)
self
.
relu
=
nn
.
ReLU
()
self
.
maxpool
=
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
layer1
=
self
.
_make_layer
(
block
,
64
,
layers
[
0
])
self
.
layer2
=
self
.
_make_layer
(
block
,
128
,
layers
[
1
],
stride
=
2
,
dcn
=
dcn
)
self
.
layer3
=
self
.
_make_layer
(
block
,
256
,
layers
[
2
],
stride
=
2
,
dcn
=
dcn
)
self
.
layer4
=
self
.
_make_layer
(
block
,
512
,
layers
[
3
],
stride
=
2
,
dcn
=
dcn
)
if
self
.
dcn
is
not
None
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
Bottleneck
)
or
isinstance
(
m
,
BasicBlock
):
if
hasattr
(
m
,
'conv2_offset'
):
constant_init
(
m
.
conv2_offset
,
0
)
def
_make_layer
(
self
,
block
,
planes
,
blocks
,
stride
=
1
,
dcn
=
None
):
downsample
=
None
if
stride
!=
1
or
self
.
inplanes
!=
planes
*
block
.
expansion
:
downsample
=
nn
.
Sequential
(
nn
.
Conv2D
(
self
.
inplanes
,
planes
*
block
.
expansion
,
kernel_size
=
1
,
stride
=
stride
,
bias_attr
=
False
),
BatchNorm2d
(
planes
*
block
.
expansion
,
momentum
=
0.1
),
)
layers
=
[]
layers
.
append
(
block
(
self
.
inplanes
,
planes
,
stride
,
downsample
,
dcn
=
dcn
))
self
.
inplanes
=
planes
*
block
.
expansion
for
i
in
range
(
1
,
blocks
):
layers
.
append
(
block
(
self
.
inplanes
,
planes
,
dcn
=
dcn
))
self
.
out_channels
.
append
(
planes
*
block
.
expansion
)
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
maxpool
(
x
)
x2
=
self
.
layer1
(
x
)
x3
=
self
.
layer2
(
x2
)
x4
=
self
.
layer3
(
x3
)
x5
=
self
.
layer4
(
x4
)
return
x2
,
x3
,
x4
,
x5
def
load_torch_params
(
paddle_model
,
torch_patams
):
paddle_params
=
paddle_model
.
state_dict
()
fc_names
=
[
'classifier'
]
for
key
,
torch_value
in
torch_patams
.
items
():
if
'num_batches_tracked'
in
key
:
continue
key
=
key
.
replace
(
"running_var"
,
"_variance"
).
replace
(
"running_mean"
,
"_mean"
).
replace
(
"module."
,
""
)
torch_value
=
torch_value
.
detach
().
cpu
().
numpy
()
if
key
in
paddle_params
:
flag
=
[
i
in
key
for
i
in
fc_names
]
if
any
(
flag
)
and
"weight"
in
key
:
# ignore bias
new_shape
=
[
1
,
0
]
+
list
(
range
(
2
,
torch_value
.
ndim
))
print
(
f
"name:
{
key
}
, ori shape:
{
torch_value
.
shape
}
, new shape:
{
torch_value
.
transpose
(
new_shape
).
shape
}
"
)
torch_value
=
torch_value
.
transpose
(
new_shape
)
paddle_params
[
key
]
=
torch_value
else
:
print
(
f
'
{
key
}
not in paddle'
)
paddle_model
.
set_state_dict
(
paddle_params
)
def
load_models
(
model
,
model_name
):
import
torch.utils.model_zoo
as
model_zoo
torch_patams
=
model_zoo
.
load_url
(
model_urls
[
model_name
])
load_torch_params
(
model
,
torch_patams
)
def
resnet18
(
pretrained
=
True
,
**
kwargs
):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
BasicBlock
,
[
2
,
2
,
2
,
2
],
**
kwargs
)
if
pretrained
:
assert
kwargs
.
get
(
'in_channels'
,
3
)
==
3
,
'in_channels must be 3 whem pretrained is True'
print
(
'load from imagenet'
)
load_models
(
model
,
'resnet18'
)
return
model
def
deformable_resnet18
(
pretrained
=
True
,
**
kwargs
):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
BasicBlock
,
[
2
,
2
,
2
,
2
],
dcn
=
dict
(
deformable_groups
=
1
),
**
kwargs
)
if
pretrained
:
assert
kwargs
.
get
(
'in_channels'
,
3
)
==
3
,
'in_channels must be 3 whem pretrained is True'
print
(
'load from imagenet'
)
model
.
load_state_dict
(
model_zoo
.
load_url
(
model_urls
[
'resnet18'
]),
strict
=
False
)
return
model
def
resnet34
(
pretrained
=
True
,
**
kwargs
):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
BasicBlock
,
[
3
,
4
,
6
,
3
],
**
kwargs
)
if
pretrained
:
assert
kwargs
.
get
(
'in_channels'
,
3
)
==
3
,
'in_channels must be 3 whem pretrained is True'
model
.
load_state_dict
(
model_zoo
.
load_url
(
model_urls
[
'resnet34'
]),
strict
=
False
)
return
model
def
resnet50
(
pretrained
=
True
,
**
kwargs
):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
Bottleneck
,
[
3
,
4
,
6
,
3
],
**
kwargs
)
if
pretrained
:
assert
kwargs
.
get
(
'in_channels'
,
3
)
==
3
,
'in_channels must be 3 whem pretrained is True'
load_models
(
model
,
'resnet50'
)
return
model
def
deformable_resnet50
(
pretrained
=
True
,
**
kwargs
):
"""Constructs a ResNet-50 model with deformable conv.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
Bottleneck
,
[
3
,
4
,
6
,
3
],
dcn
=
dict
(
deformable_groups
=
1
),
**
kwargs
)
if
pretrained
:
assert
kwargs
.
get
(
'in_channels'
,
3
)
==
3
,
'in_channels must be 3 whem pretrained is True'
model
.
load_state_dict
(
model_zoo
.
load_url
(
model_urls
[
'resnet50'
]),
strict
=
False
)
return
model
def
resnet101
(
pretrained
=
True
,
**
kwargs
):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
Bottleneck
,
[
3
,
4
,
23
,
3
],
**
kwargs
)
if
pretrained
:
assert
kwargs
.
get
(
'in_channels'
,
3
)
==
3
,
'in_channels must be 3 whem pretrained is True'
model
.
load_state_dict
(
model_zoo
.
load_url
(
model_urls
[
'resnet101'
]),
strict
=
False
)
return
model
def
resnet152
(
pretrained
=
True
,
**
kwargs
):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
Bottleneck
,
[
3
,
8
,
36
,
3
],
**
kwargs
)
if
pretrained
:
assert
kwargs
.
get
(
'in_channels'
,
3
)
==
3
,
'in_channels must be 3 whem pretrained is True'
model
.
load_state_dict
(
model_zoo
.
load_url
(
model_urls
[
'resnet152'
]),
strict
=
False
)
return
model
if
__name__
==
'__main__'
:
x
=
paddle
.
zeros
([
2
,
3
,
640
,
640
])
net
=
resnet50
(
pretrained
=
True
)
y
=
net
(
x
)
for
u
in
y
:
print
(
u
.
shape
)
print
(
net
.
out_channels
)
benchmark/PaddleOCR_DBNet/models/basic.py
0 → 100644
View file @
4824c25b
# -*- coding: utf-8 -*-
# @Time : 2019/12/6 11:19
# @Author : zhoujun
from
paddle
import
nn
class
ConvBnRelu
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
padding_mode
=
'zeros'
,
inplace
=
True
):
super
().
__init__
()
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
,
groups
=
groups
,
bias_attr
=
bias
,
padding_mode
=
padding_mode
)
self
.
bn
=
nn
.
BatchNorm2D
(
out_channels
)
self
.
relu
=
nn
.
ReLU
()
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
x
=
self
.
relu
(
x
)
return
x
Prev
1
…
6
7
8
9
10
11
12
13
14
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment