Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
006d84bf
Unverified
Commit
006d84bf
authored
Oct 21, 2021
by
崔浩
Committed by
GitHub
Oct 21, 2021
Browse files
Merge branch 'PaddlePaddle:dygraph' into dygraph
parents
302ca30c
8beeb84c
Changes
241
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
776 additions
and
18 deletions
+776
-18
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_student.yml
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_student.yml
+132
-0
configs/det/det_mv3_db.yml
configs/det/det_mv3_db.yml
+1
-1
configs/det/det_mv3_pse.yml
configs/det/det_mv3_pse.yml
+135
-0
configs/det/det_r50_vd_db.yml
configs/det/det_r50_vd_db.yml
+2
-2
configs/det/det_r50_vd_east.yml
configs/det/det_r50_vd_east.yml
+1
-1
configs/det/det_r50_vd_pse.yml
configs/det/det_r50_vd_pse.yml
+134
-0
configs/det/det_res18_db_v2.0.yml
configs/det/det_res18_db_v2.0.yml
+131
-0
configs/e2e/e2e_r50_vd_pg.yml
configs/e2e/e2e_r50_vd_pg.yml
+2
-2
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
+110
-0
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
+3
-3
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
...igs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
+125
-0
configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml
configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml
+0
-1
configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml
configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml
+0
-1
configs/rec/multi_language/rec_arabic_lite_train.yml
configs/rec/multi_language/rec_arabic_lite_train.yml
+0
-1
configs/rec/multi_language/rec_cyrillic_lite_train.yml
configs/rec/multi_language/rec_cyrillic_lite_train.yml
+0
-1
configs/rec/multi_language/rec_devanagari_lite_train.yml
configs/rec/multi_language/rec_devanagari_lite_train.yml
+0
-1
configs/rec/multi_language/rec_en_number_lite_train.yml
configs/rec/multi_language/rec_en_number_lite_train.yml
+0
-1
configs/rec/multi_language/rec_french_lite_train.yml
configs/rec/multi_language/rec_french_lite_train.yml
+0
-1
configs/rec/multi_language/rec_german_lite_train.yml
configs/rec/multi_language/rec_german_lite_train.yml
+0
-1
configs/rec/multi_language/rec_japan_lite_train.yml
configs/rec/multi_language/rec_japan_lite_train.yml
+0
-1
No files found.
configs/det/ch_PP-OCRv2/ch_PP-OCR_det_student.yml
0 → 100644
View file @
006d84bf
Global
:
use_gpu
:
true
epoch_num
:
1200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/ch_db_mv3/
save_epoch_step
:
1200
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
0
,
400
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/student.pdparams
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_db/predicts_db.txt
Architecture
:
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
96
Head
:
name
:
DBHead
k
:
50
Loss
:
name
:
DBLoss
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
2
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
DBPostProcess
thresh
:
0.3
box_thresh
:
0.6
max_candidates
:
1000
unclip_ratio
:
1.5
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
EastRandomCropData
:
size
:
[
960
,
960
]
max_tries
:
50
keep_ratio
:
true
-
MakeBorderMap
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
MakeShrinkMap
:
shrink_ratio
:
0.4
min_text_size
:
8
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
threshold_map'
,
'
threshold_mask'
,
'
shrink_map'
,
'
shrink_mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
# image_shape: [736, 1280]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
configs/det/det_mv3_db.yml
View file @
006d84bf
...
...
@@ -128,4 +128,4 @@ Eval:
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
8
use_shared_memory
:
False
\ No newline at end of file
use_shared_memory
:
False
configs/det/det_mv3_pse.yml
0 → 100644
View file @
006d84bf
Global
:
use_gpu
:
true
epoch_num
:
600
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/det_mv3_pse/
save_epoch_step
:
600
# evaluation is run every 63 iterations
eval_batch_step
:
[
0
,
63
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
#./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_pse/predicts_pse.txt
Architecture
:
model_type
:
det
algorithm
:
PSE
Transform
:
null
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
Neck
:
name
:
FPN
out_channels
:
96
Head
:
name
:
PSEHead
hidden_dim
:
96
out_channels
:
7
Loss
:
name
:
PSELoss
alpha
:
0.7
ohem_ratio
:
3
kernel_sample_mask
:
pred
reduction
:
none
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Step
learning_rate
:
0.001
step_size
:
200
gamma
:
0.1
regularizer
:
name
:
'
L2'
factor
:
0.0005
PostProcess
:
name
:
PSEPostProcess
thresh
:
0
box_thresh
:
0.85
min_area
:
16
box_type
:
box
# 'box' or 'poly'
scale
:
1
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
ColorJitter
:
brightness
:
0.12549019607843137
saturation
:
0.5
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
MakePseGt
:
kernel_num
:
7
min_shrink_ratio
:
0.4
size
:
640
-
RandomCropImgMask
:
size
:
[
640
,
640
]
main_key
:
gt_text
crop_keys
:
[
'
image'
,
'
gt_text'
,
'
gt_kernels'
,
'
mask'
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
gt_text'
,
'
gt_kernels'
,
'
mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
16
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
limit_side_len
:
736
limit_type
:
min
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
8
\ No newline at end of file
configs/det/det_r50_vd_db.yml
View file @
006d84bf
...
...
@@ -98,7 +98,7 @@ Train:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
16
num_workers
:
8
num_workers
:
4
Eval
:
dataset
:
...
...
@@ -125,4 +125,4 @@ Eval:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
8
\ No newline at end of file
num_workers
:
8
configs/det/det_r50_vd_east.yml
View file @
006d84bf
...
...
@@ -8,7 +8,7 @@ Global:
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
5000
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_pretrained
/
pretrained_model
:
./pretrain_models/ResNet50_vd_pretrained
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
...
...
configs/det/det_r50_vd_pse.yml
0 → 100644
View file @
006d84bf
Global
:
use_gpu
:
true
epoch_num
:
600
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/det_r50_vd_pse/
save_epoch_step
:
600
# evaluation is run every 125 iterations
eval_batch_step
:
[
0
,
125
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained
checkpoints
:
#./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_pse/predicts_pse.txt
Architecture
:
model_type
:
det
algorithm
:
PSE
Transform
:
Backbone
:
name
:
ResNet
layers
:
50
Neck
:
name
:
FPN
out_channels
:
256
Head
:
name
:
PSEHead
hidden_dim
:
256
out_channels
:
7
Loss
:
name
:
PSELoss
alpha
:
0.7
ohem_ratio
:
3
kernel_sample_mask
:
pred
reduction
:
none
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Step
learning_rate
:
0.0001
step_size
:
200
gamma
:
0.1
regularizer
:
name
:
'
L2'
factor
:
0.0005
PostProcess
:
name
:
PSEPostProcess
thresh
:
0
box_thresh
:
0.85
min_area
:
16
box_type
:
box
# 'box' or 'poly'
scale
:
1
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
ColorJitter
:
brightness
:
0.12549019607843137
saturation
:
0.5
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
MakePseGt
:
kernel_num
:
7
min_shrink_ratio
:
0.4
size
:
640
-
RandomCropImgMask
:
size
:
[
640
,
640
]
main_key
:
gt_text
crop_keys
:
[
'
image'
,
'
gt_text'
,
'
gt_kernels'
,
'
mask'
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
gt_text'
,
'
gt_kernels'
,
'
mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
limit_side_len
:
736
limit_type
:
min
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
8
\ No newline at end of file
configs/det/det_res18_db_v2.0.yml
0 → 100644
View file @
006d84bf
Global
:
use_gpu
:
true
epoch_num
:
1200
log_smooth_window
:
20
print_batch_step
:
2
save_model_dir
:
./output/ch_db_res18/
save_epoch_step
:
1200
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
3000
,
2000
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet18_vd_pretrained
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_db/predicts_db.txt
Architecture
:
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
ResNet
layers
:
18
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
256
Head
:
name
:
DBHead
k
:
50
Loss
:
name
:
DBLoss
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
2
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
DBPostProcess
thresh
:
0.3
box_thresh
:
0.6
max_candidates
:
1000
unclip_ratio
:
1.5
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
{
'
type'
:
Affine
,
'
args'
:
{
'
rotate'
:
[
-10
,
10
]
}
}
-
{
'
type'
:
Resize
,
'
args'
:
{
'
size'
:
[
0.5
,
3
]
}
}
-
EastRandomCropData
:
size
:
[
960
,
960
]
max_tries
:
50
keep_ratio
:
true
-
MakeBorderMap
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
MakeShrinkMap
:
shrink_ratio
:
0.4
min_text_size
:
8
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
threshold_map'
,
'
threshold_mask'
,
'
shrink_map'
,
'
shrink_mask'
]
# the order of the dataloader list
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
# image_shape: [736, 1280]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
configs/e2e/e2e_r50_vd_pg.yml
View file @
006d84bf
...
...
@@ -94,7 +94,7 @@ Eval:
label_file_list
:
[
./train_data/total_text/test/test.txt
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
img_mode
:
BGR
channel_first
:
False
-
E2ELabelEncodeTest
:
-
E2EResizeForTest
:
...
...
@@ -111,4 +111,4 @@ Eval:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
\ No newline at end of file
num_workers
:
2
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
0 → 100644
View file @
006d84bf
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_mobile_pp-OCRv2
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_mobile_pp-OCRv2.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
decay_epochs
:
[
700
,
800
]
values
:
[
0.001
,
0.0001
]
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
2.0e-05
Architecture
:
model_type
:
rec
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
Loss
:
name
:
CTCLoss
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
8
configs/rec/ch_
ppocr_v2.1/rec_chinese_lite_train
_distillation
_v2.1
.yml
→
configs/rec/ch_
PP-OCRv2/ch_PP-OCRv2_rec
_distillation.yml
View file @
006d84bf
...
...
@@ -4,7 +4,7 @@ Global:
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_
chinese_lite
_distillation
_v2.1
save_model_dir
:
./output/rec_
pp-OCRv2
_distillation
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
...
...
@@ -14,12 +14,11 @@ Global:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
character_type
:
ch
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_
chinese_lite
_distillation
_v2.1
.txt
save_res_path
:
./output/rec/predicts_
pp-OCRv2
_distillation.txt
Optimizer
:
...
...
@@ -88,6 +87,7 @@ Loss:
-
DistillationDMLLoss
:
weight
:
1.0
act
:
"
softmax"
use_log
:
true
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
head_out
...
...
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
0 → 100644
View file @
006d84bf
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_mobile_pp-OCRv2_enhanced_ctc_loss
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_mobile_pp-OCRv2_enhanced_ctc_loss.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
decay_epochs
:
[
700
,
800
]
values
:
[
0.001
,
0.0001
]
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
2.0e-05
Architecture
:
model_type
:
rec
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
return_feats
:
true
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
CTCLoss
:
use_focal_loss
:
false
weight
:
1.0
-
CenterLoss
:
weight
:
0.05
num_classes
:
6625
feat_dim
:
96
init_center
:
false
center_file_path
:
"
./train_center.pkl"
# you can also try to add ace loss on your own dataset
# - ACELoss:
# weight: 0.1
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
-
label_ace
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
8
configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml
View file @
006d84bf
...
...
@@ -15,7 +15,6 @@ Global:
infer_img
:
doc/imgs_words/ch/word_1.jpg
# for data or label process
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
character_type
:
ch
max_text_length
:
25
infer_mode
:
False
use_space_char
:
True
...
...
configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml
View file @
006d84bf
...
...
@@ -15,7 +15,6 @@ Global:
infer_img
:
doc/imgs_words/ch/word_1.jpg
# for data or label process
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
character_type
:
ch
max_text_length
:
25
infer_mode
:
False
use_space_char
:
True
...
...
configs/rec/multi_language/rec_arabic_lite_train.yml
View file @
006d84bf
...
...
@@ -15,7 +15,6 @@ Global:
use_visualdl
:
false
infer_img
:
null
character_dict_path
:
ppocr/utils/dict/arabic_dict.txt
character_type
:
arabic
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
...
...
configs/rec/multi_language/rec_cyrillic_lite_train.yml
View file @
006d84bf
...
...
@@ -15,7 +15,6 @@ Global:
use_visualdl
:
false
infer_img
:
null
character_dict_path
:
ppocr/utils/dict/cyrillic_dict.txt
character_type
:
cyrillic
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
...
...
configs/rec/multi_language/rec_devanagari_lite_train.yml
View file @
006d84bf
...
...
@@ -15,7 +15,6 @@ Global:
use_visualdl
:
false
infer_img
:
null
character_dict_path
:
ppocr/utils/dict/devanagari_dict.txt
character_type
:
devanagari
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
...
...
configs/rec/multi_language/rec_en_number_lite_train.yml
View file @
006d84bf
...
...
@@ -16,7 +16,6 @@ Global:
infer_img
:
# for data or label process
character_dict_path
:
ppocr/utils/en_dict.txt
character_type
:
EN
max_text_length
:
25
infer_mode
:
False
use_space_char
:
True
...
...
configs/rec/multi_language/rec_french_lite_train.yml
View file @
006d84bf
...
...
@@ -16,7 +16,6 @@ Global:
infer_img
:
# for data or label process
character_dict_path
:
ppocr/utils/dict/french_dict.txt
character_type
:
french
max_text_length
:
25
infer_mode
:
False
use_space_char
:
False
...
...
configs/rec/multi_language/rec_german_lite_train.yml
View file @
006d84bf
...
...
@@ -16,7 +16,6 @@ Global:
infer_img
:
# for data or label process
character_dict_path
:
ppocr/utils/dict/german_dict.txt
character_type
:
german
max_text_length
:
25
infer_mode
:
False
use_space_char
:
False
...
...
configs/rec/multi_language/rec_japan_lite_train.yml
View file @
006d84bf
...
...
@@ -16,7 +16,6 @@ Global:
infer_img
:
# for data or label process
character_dict_path
:
ppocr/utils/dict/japan_dict.txt
character_type
:
japan
max_text_length
:
25
infer_mode
:
False
use_space_char
:
False
...
...
Prev
1
2
3
4
5
6
7
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment