Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
83303bc7
Commit
83303bc7
authored
Oct 09, 2021
by
LDOUBLEV
Browse files
fix conflicts
parents
3af943f3
af0bac58
Changes
424
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1520 additions
and
56 deletions
+1520
-56
benchmark/run_det.sh
benchmark/run_det.sh
+28
-0
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml
+199
-0
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml
+174
-0
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml
+176
-0
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_student.yml
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_student.yml
+132
-0
configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml
configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml
+0
-5
configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml
configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml
+0
-5
configs/det/det_mv3_db.yml
configs/det/det_mv3_db.yml
+1
-6
configs/det/det_mv3_east.yml
configs/det/det_mv3_east.yml
+0
-5
configs/det/det_mv3_pse.yml
configs/det/det_mv3_pse.yml
+135
-0
configs/det/det_r50_vd_db.yml
configs/det/det_r50_vd_db.yml
+2
-7
configs/det/det_r50_vd_east.yml
configs/det/det_r50_vd_east.yml
+1
-6
configs/det/det_r50_vd_pse.yml
configs/det/det_r50_vd_pse.yml
+134
-0
configs/det/det_r50_vd_sast_icdar15.yml
configs/det/det_r50_vd_sast_icdar15.yml
+0
-5
configs/det/det_r50_vd_sast_totaltext.yml
configs/det/det_r50_vd_sast_totaltext.yml
+0
-5
configs/det/det_res18_db_v2.0.yml
configs/det/det_res18_db_v2.0.yml
+131
-0
configs/e2e/e2e_r50_vd_pg.yml
configs/e2e/e2e_r50_vd_pg.yml
+10
-12
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
+111
-0
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
+160
-0
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
...igs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
+126
-0
No files found.
benchmark/run_det.sh
0 → 100644
View file @
83303bc7
# 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37
# 执行目录: ./PaddleOCR
# 1 安装该模型需要的依赖 (如需开启优化策略请注明)
python3.7
-m
pip
install
-r
requirements.txt
# 2 拷贝该模型需要数据、预训练模型
wget
-c
-p
./tain_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar
&&
cd
train_data
&&
tar
xf icdar2015.tar
&&
cd
../
wget
-c
-p
./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
# 3 批量运行(如不方便批量,1,2需放到单个模型中)
model_mode_list
=(
det_res18_db_v2.0 det_r50_vd_east
)
fp_item_list
=(
fp32
)
bs_list
=(
8 16
)
for
model_mode
in
${
model_mode_list
[@]
}
;
do
for
fp_item
in
${
fp_item_list
[@]
}
;
do
for
bs_item
in
${
bs_list
[@]
}
;
do
echo
"index is speed, 1gpus, begin,
${
model_name
}
"
run_mode
=
sp
CUDA_VISIBLE_DEVICES
=
0 bash benchmark/run_benchmark_det.sh
${
run_mode
}
${
bs_item
}
${
fp_item
}
10
${
model_mode
}
# (5min)
sleep
60
echo
"index is speed, 8gpus, run_mode is multi_process, begin,
${
model_name
}
"
run_mode
=
mp
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh
${
run_mode
}
${
bs_item
}
${
fp_item
}
10
${
model_mode
}
sleep
60
done
done
done
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml
0 → 100644
View file @
83303bc7
Global
:
use_gpu
:
true
epoch_num
:
1200
log_smooth_window
:
20
print_batch_step
:
2
save_model_dir
:
./output/ch_db_mv3/
save_epoch_step
:
1200
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
3000
,
2000
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ch_PP-OCRv2_det_distill_train/best_accuracy
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_db/predicts_db.txt
Architecture
:
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Teacher
:
freeze_params
:
true
return_all_feats
:
false
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
ResNet
layers
:
18
Neck
:
name
:
DBFPN
out_channels
:
256
Head
:
name
:
DBHead
k
:
50
Student
:
freeze_params
:
false
return_all_feats
:
false
model_type
:
det
algorithm
:
DB
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
96
Head
:
name
:
DBHead
k
:
50
Student2
:
freeze_params
:
false
return_all_feats
:
false
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
96
Head
:
name
:
DBHead
k
:
50
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationDilaDBLoss
:
weight
:
1.0
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
-
[
"
Student2"
,
"
Teacher"
]
key
:
maps
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
-
DistillationDMLLoss
:
model_name_pairs
:
-
[
"
Student"
,
"
Student2"
]
maps_name
:
"
thrink_maps"
weight
:
1.0
# act: None
model_name_pairs
:
[
"
Student"
,
"
Student2"
]
key
:
maps
-
DistillationDBLoss
:
weight
:
1.0
model_name_list
:
[
"
Student"
,
"
Student2"
]
# key: maps
# name: DBLoss
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
2
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
DistillationDBPostProcess
model_name
:
[
"
Student"
,
"
Student2"
,
"
Teacher"
]
# key: maps
thresh
:
0.3
box_thresh
:
0.6
max_candidates
:
1000
unclip_ratio
:
1.5
Metric
:
name
:
DistillationMetric
base_metric_name
:
DetMetric
main_indicator
:
hmean
key
:
"
Student"
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
EastRandomCropData
:
size
:
[
960
,
960
]
max_tries
:
50
keep_ratio
:
true
-
MakeBorderMap
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
MakeShrinkMap
:
shrink_ratio
:
0.4
min_text_size
:
8
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
threshold_map'
,
'
threshold_mask'
,
'
shrink_map'
,
'
shrink_mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
# image_shape: [736, 1280]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml
0 → 100644
View file @
83303bc7
Global
:
use_gpu
:
true
epoch_num
:
1200
log_smooth_window
:
20
print_batch_step
:
2
save_model_dir
:
./output/ch_db_mv3/
save_epoch_step
:
1200
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
3000
,
2000
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_db/predicts_db.txt
Architecture
:
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Student
:
pretrained
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
freeze_params
:
false
return_all_feats
:
false
model_type
:
det
algorithm
:
DB
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
96
Head
:
name
:
DBHead
k
:
50
Teacher
:
pretrained
:
./pretrain_models/ch_ppocr_server_v2.0_det_train/best_accuracy
freeze_params
:
true
return_all_feats
:
false
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
ResNet
layers
:
18
Neck
:
name
:
DBFPN
out_channels
:
256
Head
:
name
:
DBHead
k
:
50
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationDilaDBLoss
:
weight
:
1.0
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
maps
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
-
DistillationDBLoss
:
weight
:
1.0
model_name_list
:
[
"
Student"
,
"
Teacher"
]
# key: maps
name
:
DBLoss
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
2
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
DistillationDBPostProcess
model_name
:
[
"
Student"
,
"
Student2"
]
key
:
head_out
thresh
:
0.3
box_thresh
:
0.6
max_candidates
:
1000
unclip_ratio
:
1.5
Metric
:
name
:
DistillationMetric
base_metric_name
:
DetMetric
main_indicator
:
hmean
key
:
"
Student"
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
EastRandomCropData
:
size
:
[
960
,
960
]
max_tries
:
50
keep_ratio
:
true
-
MakeBorderMap
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
MakeShrinkMap
:
shrink_ratio
:
0.4
min_text_size
:
8
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
threshold_map'
,
'
threshold_mask'
,
'
shrink_map'
,
'
shrink_mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
# image_shape: [736, 1280]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml
0 → 100644
View file @
83303bc7
Global
:
use_gpu
:
true
epoch_num
:
1200
log_smooth_window
:
20
print_batch_step
:
2
save_model_dir
:
./output/ch_db_mv3/
save_epoch_step
:
1200
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
3000
,
2000
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_db/predicts_db.txt
Architecture
:
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Student
:
pretrained
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
freeze_params
:
false
return_all_feats
:
false
model_type
:
det
algorithm
:
DB
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
96
Head
:
name
:
DBHead
k
:
50
Student2
:
pretrained
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
freeze_params
:
false
return_all_feats
:
false
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
96
Head
:
name
:
DBHead
k
:
50
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationDMLLoss
:
model_name_pairs
:
-
[
"
Student"
,
"
Student2"
]
maps_name
:
"
thrink_maps"
weight
:
1.0
act
:
"
softmax"
model_name_pairs
:
[
"
Student"
,
"
Student2"
]
key
:
maps
-
DistillationDBLoss
:
weight
:
1.0
model_name_list
:
[
"
Student"
,
"
Student2"
]
# key: maps
name
:
DBLoss
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
2
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
DistillationDBPostProcess
model_name
:
[
"
Student"
,
"
Student2"
]
key
:
head_out
thresh
:
0.3
box_thresh
:
0.6
max_candidates
:
1000
unclip_ratio
:
1.5
Metric
:
name
:
DistillationMetric
base_metric_name
:
DetMetric
main_indicator
:
hmean
key
:
"
Student"
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
EastRandomCropData
:
size
:
[
960
,
960
]
max_tries
:
50
keep_ratio
:
true
-
MakeBorderMap
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
MakeShrinkMap
:
shrink_ratio
:
0.4
min_text_size
:
8
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
threshold_map'
,
'
threshold_mask'
,
'
shrink_map'
,
'
shrink_mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
# image_shape: [736, 1280]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_student.yml
0 → 100644
View file @
83303bc7
Global
:
use_gpu
:
true
epoch_num
:
1200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/ch_db_mv3/
save_epoch_step
:
1200
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
0
,
400
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/student.pdparams
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_db/predicts_db.txt
Architecture
:
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
96
Head
:
name
:
DBHead
k
:
50
Loss
:
name
:
DBLoss
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
2
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
DBPostProcess
thresh
:
0.3
box_thresh
:
0.6
max_candidates
:
1000
unclip_ratio
:
1.5
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
EastRandomCropData
:
size
:
[
960
,
960
]
max_tries
:
50
keep_ratio
:
true
-
MakeBorderMap
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
MakeShrinkMap
:
shrink_ratio
:
0.4
min_text_size
:
8
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
threshold_map'
,
'
threshold_mask'
,
'
shrink_map'
,
'
shrink_mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
# image_shape: [736, 1280]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml
View file @
83303bc7
...
@@ -7,11 +7,6 @@ Global:
...
@@ -7,11 +7,6 @@ Global:
save_epoch_step
:
1200
save_epoch_step
:
1200
# evaluation is run every 5000 iterations after the 4000th iteration
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
3000
,
2000
]
eval_batch_step
:
[
3000
,
2000
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
True
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
checkpoints
:
...
...
configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml
View file @
83303bc7
...
@@ -7,11 +7,6 @@ Global:
...
@@ -7,11 +7,6 @@ Global:
save_epoch_step
:
1200
save_epoch_step
:
1200
# evaluation is run every 5000 iterations after the 4000th iteration
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
3000
,
2000
]
eval_batch_step
:
[
3000
,
2000
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
True
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet18_vd_pretrained
pretrained_model
:
./pretrain_models/ResNet18_vd_pretrained
checkpoints
:
checkpoints
:
...
...
configs/det/det_mv3_db.yml
View file @
83303bc7
...
@@ -7,11 +7,6 @@ Global:
...
@@ -7,11 +7,6 @@ Global:
save_epoch_step
:
1200
save_epoch_step
:
1200
# evaluation is run every 2000 iterations
# evaluation is run every 2000 iterations
eval_batch_step
:
[
0
,
2000
]
eval_batch_step
:
[
0
,
2000
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
True
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
checkpoints
:
...
...
configs/det/det_mv3_east.yml
View file @
83303bc7
...
@@ -7,11 +7,6 @@ Global:
...
@@ -7,11 +7,6 @@ Global:
save_epoch_step
:
1000
save_epoch_step
:
1000
# evaluation is run every 5000 iterations after the 4000th iteration
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
5000
]
eval_batch_step
:
[
4000
,
5000
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
True
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
checkpoints
:
...
...
configs/det/det_mv3_pse.yml
0 → 100644
View file @
83303bc7
Global
:
use_gpu
:
true
epoch_num
:
600
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/det_mv3_pse/
save_epoch_step
:
600
# evaluation is run every 63 iterations
eval_batch_step
:
[
0
,
63
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
#./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_pse/predicts_pse.txt
Architecture
:
model_type
:
det
algorithm
:
PSE
Transform
:
null
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
Neck
:
name
:
FPN
out_channels
:
96
Head
:
name
:
PSEHead
hidden_dim
:
96
out_channels
:
7
Loss
:
name
:
PSELoss
alpha
:
0.7
ohem_ratio
:
3
kernel_sample_mask
:
pred
reduction
:
none
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Step
learning_rate
:
0.001
step_size
:
200
gamma
:
0.1
regularizer
:
name
:
'
L2'
factor
:
0.0005
PostProcess
:
name
:
PSEPostProcess
thresh
:
0
box_thresh
:
0.85
min_area
:
16
box_type
:
box
# 'box' or 'poly'
scale
:
1
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
ColorJitter
:
brightness
:
0.12549019607843137
saturation
:
0.5
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
MakePseGt
:
kernel_num
:
7
min_shrink_ratio
:
0.4
size
:
640
-
RandomCropImgMask
:
size
:
[
640
,
640
]
main_key
:
gt_text
crop_keys
:
[
'
image'
,
'
gt_text'
,
'
gt_kernels'
,
'
mask'
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
gt_text'
,
'
gt_kernels'
,
'
mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
16
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
limit_side_len
:
736
limit_type
:
min
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
8
\ No newline at end of file
configs/det/det_r50_vd_db.yml
View file @
83303bc7
...
@@ -7,11 +7,6 @@ Global:
...
@@ -7,11 +7,6 @@ Global:
save_epoch_step
:
1200
save_epoch_step
:
1200
# evaluation is run every 2000 iterations
# evaluation is run every 2000 iterations
eval_batch_step
:
[
0
,
2000
]
eval_batch_step
:
[
0
,
2000
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
True
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained
checkpoints
:
checkpoints
:
...
@@ -103,7 +98,7 @@ Train:
...
@@ -103,7 +98,7 @@ Train:
shuffle
:
True
shuffle
:
True
drop_last
:
False
drop_last
:
False
batch_size_per_card
:
16
batch_size_per_card
:
16
num_workers
:
8
num_workers
:
4
Eval
:
Eval
:
dataset
:
dataset
:
...
...
configs/det/det_r50_vd_east.yml
View file @
83303bc7
...
@@ -7,13 +7,8 @@ Global:
...
@@ -7,13 +7,8 @@ Global:
save_epoch_step
:
1000
save_epoch_step
:
1000
# evaluation is run every 5000 iterations after the 4000th iteration
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
5000
]
eval_batch_step
:
[
4000
,
5000
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
True
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_pretrained
/
pretrained_model
:
./pretrain_models/ResNet50_vd_pretrained
checkpoints
:
checkpoints
:
save_inference_dir
:
save_inference_dir
:
use_visualdl
:
False
use_visualdl
:
False
...
...
configs/det/det_r50_vd_pse.yml
0 → 100644
View file @
83303bc7
Global
:
use_gpu
:
true
epoch_num
:
600
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/det_r50_vd_pse/
save_epoch_step
:
600
# evaluation is run every 125 iterations
eval_batch_step
:
[
0
,
125
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained
checkpoints
:
#./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_pse/predicts_pse.txt
Architecture
:
model_type
:
det
algorithm
:
PSE
Transform
:
Backbone
:
name
:
ResNet
layers
:
50
Neck
:
name
:
FPN
out_channels
:
256
Head
:
name
:
PSEHead
hidden_dim
:
256
out_channels
:
7
Loss
:
name
:
PSELoss
alpha
:
0.7
ohem_ratio
:
3
kernel_sample_mask
:
pred
reduction
:
none
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Step
learning_rate
:
0.0001
step_size
:
200
gamma
:
0.1
regularizer
:
name
:
'
L2'
factor
:
0.0005
PostProcess
:
name
:
PSEPostProcess
thresh
:
0
box_thresh
:
0.85
min_area
:
16
box_type
:
box
# 'box' or 'poly'
scale
:
1
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
ColorJitter
:
brightness
:
0.12549019607843137
saturation
:
0.5
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
MakePseGt
:
kernel_num
:
7
min_shrink_ratio
:
0.4
size
:
640
-
RandomCropImgMask
:
size
:
[
640
,
640
]
main_key
:
gt_text
crop_keys
:
[
'
image'
,
'
gt_text'
,
'
gt_kernels'
,
'
mask'
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
gt_text'
,
'
gt_kernels'
,
'
mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
limit_side_len
:
736
limit_type
:
min
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
8
\ No newline at end of file
configs/det/det_r50_vd_sast_icdar15.yml
View file @
83303bc7
...
@@ -7,11 +7,6 @@ Global:
...
@@ -7,11 +7,6 @@ Global:
save_epoch_step
:
1000
save_epoch_step
:
1000
# evaluation is run every 5000 iterations after the 4000th iteration
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
5000
]
eval_batch_step
:
[
4000
,
5000
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
True
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained/
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained/
checkpoints
:
checkpoints
:
...
...
configs/det/det_r50_vd_sast_totaltext.yml
View file @
83303bc7
...
@@ -7,11 +7,6 @@ Global:
...
@@ -7,11 +7,6 @@ Global:
save_epoch_step
:
1000
save_epoch_step
:
1000
# evaluation is run every 5000 iterations after the 4000th iteration
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
5000
]
eval_batch_step
:
[
4000
,
5000
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
True
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained/
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained/
checkpoints
:
checkpoints
:
...
...
configs/det/det_res18_db_v2.0.yml
0 → 100644
View file @
83303bc7
Global
:
use_gpu
:
true
epoch_num
:
1200
log_smooth_window
:
20
print_batch_step
:
2
save_model_dir
:
./output/ch_db_res18/
save_epoch_step
:
1200
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
3000
,
2000
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet18_vd_pretrained
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_db/predicts_db.txt
Architecture
:
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
ResNet
layers
:
18
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
256
Head
:
name
:
DBHead
k
:
50
Loss
:
name
:
DBLoss
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
2
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
DBPostProcess
thresh
:
0.3
box_thresh
:
0.6
max_candidates
:
1000
unclip_ratio
:
1.5
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
EastRandomCropData
:
size
:
[
960
,
960
]
max_tries
:
50
keep_ratio
:
true
-
MakeBorderMap
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
MakeShrinkMap
:
shrink_ratio
:
0.4
min_text_size
:
8
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
threshold_map'
,
'
threshold_mask'
,
'
shrink_map'
,
'
shrink_mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
# image_shape: [736, 1280]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
configs/e2e/e2e_r50_vd_pg.yml
View file @
83303bc7
...
@@ -7,11 +7,6 @@ Global:
...
@@ -7,11 +7,6 @@ Global:
save_epoch_step
:
10
save_epoch_step
:
10
# evaluation is run every 0 iterationss after the 1000th iteration
# evaluation is run every 0 iterationss after the 1000th iteration
eval_batch_step
:
[
0
,
1000
]
eval_batch_step
:
[
0
,
1000
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
False
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
pretrained_model
:
checkpoints
:
checkpoints
:
...
@@ -60,22 +55,25 @@ PostProcess:
...
@@ -60,22 +55,25 @@ PostProcess:
name
:
PGPostProcess
name
:
PGPostProcess
score_thresh
:
0.5
score_thresh
:
0.5
mode
:
fast
# fast or slow two ways
mode
:
fast
# fast or slow two ways
Metric
:
Metric
:
name
:
E2EMetric
name
:
E2EMetric
gt_mat_dir
:
# the dir of gt_mat
mode
:
A
# two ways for eval, A: label from txt, B: label from gt_mat
gt_mat_dir
:
./train_data/total_text/gt
# the dir of gt_mat
character_dict_path
:
ppocr/utils/ic15_dict.txt
character_dict_path
:
ppocr/utils/ic15_dict.txt
main_indicator
:
f_score_e2e
main_indicator
:
f_score_e2e
Train
:
Train
:
dataset
:
dataset
:
name
:
PGDataSet
name
:
PGDataSet
label_file_list
:
[
.././train_data/total_text/train/
]
data_dir
:
./train_data/total_text/train
label_file_list
:
[
./train_data/total_text/train/train.txt
]
ratio_list
:
[
1.0
]
ratio_list
:
[
1.0
]
data_format
:
icdar
#two data format: icdar/textnet
transforms
:
transforms
:
-
DecodeImage
:
# load image
-
DecodeImage
:
# load image
img_mode
:
BGR
img_mode
:
BGR
channel_first
:
False
channel_first
:
False
-
E2ELabelEncodeTrain
:
-
PGProcessTrain
:
-
PGProcessTrain
:
batch_size
:
14
# same as loader: batch_size_per_card
batch_size
:
14
# same as loader: batch_size_per_card
min_crop_size
:
24
min_crop_size
:
24
...
@@ -92,13 +90,13 @@ Train:
...
@@ -92,13 +90,13 @@ Train:
Eval
:
Eval
:
dataset
:
dataset
:
name
:
PGDataSet
name
:
PGDataSet
data_dir
:
./train_data/
data_dir
:
./train_data/
total_text/test
label_file_list
:
[
./train_data/total_text/test/
]
label_file_list
:
[
./train_data/total_text/test/
test.txt
]
transforms
:
transforms
:
-
DecodeImage
:
# load image
-
DecodeImage
:
# load image
img_mode
:
RGB
img_mode
:
RGB
channel_first
:
False
channel_first
:
False
-
E2ELabelEncode
:
-
E2ELabelEncode
Test
:
-
E2EResizeForTest
:
-
E2EResizeForTest
:
max_side_len
:
768
max_side_len
:
768
-
NormalizeImage
:
-
NormalizeImage
:
...
@@ -108,7 +106,7 @@ Eval:
...
@@ -108,7 +106,7 @@ Eval:
order
:
'
hwc'
order
:
'
hwc'
-
ToCHWImage
:
-
ToCHWImage
:
-
KeepKeys
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
str
s'
,
'
tags'
,
'
img_id'
]
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
text
s'
,
'
ignore_
tags'
,
'
img_id'
]
loader
:
loader
:
shuffle
:
False
shuffle
:
False
drop_last
:
False
drop_last
:
False
...
...
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
0 → 100644
View file @
83303bc7
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_mobile_pp-OCRv2
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
character_type
:
ch
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_mobile_pp-OCRv2.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
decay_epochs
:
[
700
,
800
]
values
:
[
0.001
,
0.0001
]
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
2.0e-05
Architecture
:
model_type
:
rec
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
Loss
:
name
:
CTCLoss
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
8
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
0 → 100644
View file @
83303bc7
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_pp-OCRv2_distillation
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
character_type
:
ch
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_pp-OCRv2_distillation.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
decay_epochs
:
[
700
,
800
]
values
:
[
0.001
,
0.0001
]
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
2.0e-05
Architecture
:
model_type
:
&model_type
"
rec"
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Teacher
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
Student
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationCTCLoss
:
weight
:
1.0
model_name_list
:
[
"
Student"
,
"
Teacher"
]
key
:
head_out
-
DistillationDMLLoss
:
weight
:
1.0
act
:
"
softmax"
use_log
:
true
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
head_out
-
DistillationDistanceLoss
:
weight
:
1.0
mode
:
"
l2"
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
backbone_out
PostProcess
:
name
:
DistillationCTCLabelDecode
model_name
:
[
"
Student"
,
"
Teacher"
]
key
:
head_out
Metric
:
name
:
DistillationMetric
base_metric_name
:
RecMetric
main_indicator
:
acc
key
:
"
Student"
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_sections
:
1
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
8
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
0 → 100644
View file @
83303bc7
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_mobile_pp-OCRv2_enhanced_ctc_loss
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
character_type
:
ch
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_mobile_pp-OCRv2_enhanced_ctc_loss.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
decay_epochs
:
[
700
,
800
]
values
:
[
0.001
,
0.0001
]
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
2.0e-05
Architecture
:
model_type
:
rec
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
return_feats
:
true
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
CTCLoss
:
use_focal_loss
:
false
weight
:
1.0
-
CenterLoss
:
weight
:
0.05
num_classes
:
6625
feat_dim
:
96
init_center
:
false
center_file_path
:
"
./train_center.pkl"
# you can also try to add ace loss on your own dataset
# - ACELoss:
# weight: 0.1
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
-
label_ace
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
8
Prev
1
2
3
4
5
6
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment