Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ModelZoo
CRNN_Paddle
Commits
ed43fc11
Commit
ed43fc11
authored
May 16, 2025
by
wanglch
Browse files
Initial commit
parents
Pipeline
#2703
canceled with stages
Changes
378
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2976 additions
and
0 deletions
+2976
-0
configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec_distillation.yml
configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec_distillation.yml
+231
-0
configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec_fp32_ultra.yml
configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec_fp32_ultra.yml
+138
-0
configs/rec/PP-OCRv4/PP-OCRv4_server_rec.yml
configs/rec/PP-OCRv4/PP-OCRv4_server_rec.yml
+138
-0
configs/rec/PP-OCRv4/PP-OCRv4_server_rec_ampO2_ultra.yml
configs/rec/PP-OCRv4/PP-OCRv4_server_rec_ampO2_ultra.yml
+139
-0
configs/rec/PP-OCRv4/PP-OCRv4_server_rec_doc.yml
configs/rec/PP-OCRv4/PP-OCRv4_server_rec_doc.yml
+138
-0
configs/rec/PP-OCRv4/PP-OCRv4_server_rec_fp32_ultra.yml
configs/rec/PP-OCRv4/PP-OCRv4_server_rec_fp32_ultra.yml
+137
-0
configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_svtr_large.yml
configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_svtr_large.yml
+144
-0
configs/rec/PP-OCRv4/en_PP-OCRv4_mobile_rec.yml
configs/rec/PP-OCRv4/en_PP-OCRv4_mobile_rec.yml
+151
-0
configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml
configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml
+140
-0
configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml
configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml
+136
-0
configs/rec/SVTRv2/ch_RepSVTR_rec.yml
configs/rec/SVTRv2/ch_RepSVTR_rec.yml
+136
-0
configs/rec/SVTRv2/ch_RepSVTR_rec_gtc.yml
configs/rec/SVTRv2/ch_RepSVTR_rec_gtc.yml
+134
-0
configs/rec/SVTRv2/ch_SVTRv2_rec.yml
configs/rec/SVTRv2/ch_SVTRv2_rec.yml
+144
-0
configs/rec/SVTRv2/ch_SVTRv2_rec_distillation.yml
configs/rec/SVTRv2/ch_SVTRv2_rec_distillation.yml
+208
-0
configs/rec/SVTRv2/ch_SVTRv2_rec_gtc.yml
configs/rec/SVTRv2/ch_SVTRv2_rec_gtc.yml
+145
-0
configs/rec/SVTRv2/ch_SVTRv2_rec_gtc_distill.yml
configs/rec/SVTRv2/ch_SVTRv2_rec_gtc_distill.yml
+208
-0
configs/rec/UniMERNet.yaml
configs/rec/UniMERNet.yaml
+115
-0
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
+110
-0
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
+160
-0
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
...igs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
+124
-0
No files found.
Too many changes to show.
To preserve performance only
378 of 378+
files are displayed.
Plain diff
Email patch
configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec_distillation.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/
save_epoch_step
:
40
eval_batch_step
:
-
0
-
2000
cal_metric_during_train
:
true
pretrained_model
:
null
checkpoints
:
./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/latest
save_inference_dir
:
null
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
2
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Teacher
:
pretrained
:
freeze_params
:
true
return_all_feats
:
true
model_type
:
rec
algorithm
:
SVTR
Transform
:
null
Backbone
:
name
:
SVTRNet
img_size
:
-
48
-
320
out_char_num
:
40
out_channels
:
192
patch_merging
:
Conv
embed_dim
:
-
64
-
128
-
256
depth
:
-
3
-
6
-
3
num_heads
:
-
2
-
4
-
8
mixer
:
-
Conv
-
Conv
-
Conv
-
Conv
-
Conv
-
Conv
-
Global
-
Global
-
Global
-
Global
-
Global
-
Global
local_mixer
:
-
-
5
-
5
-
-
5
-
5
-
-
5
-
5
last_stage
:
false
prenorm
:
true
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
Student
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
rec
algorithm
:
SVTR
Transform
:
null
Backbone
:
name
:
PPLCNetV3
scale
:
0.95
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationDKDLoss
:
weight
:
0.1
model_name_pairs
:
-
-
Student
-
Teacher
key
:
head_out
multi_head
:
true
alpha
:
1.0
beta
:
2.0
dis_head
:
gtc
name
:
dkd
-
DistillationCTCLoss
:
weight
:
1.0
model_name_list
:
-
Student
key
:
head_out
multi_head
:
true
-
DistillationNRTRLoss
:
weight
:
1.0
smoothing
:
false
model_name_list
:
-
Student
key
:
head_out
multi_head
:
true
-
DistillCTCLogits
:
weight
:
1.0
reduction
:
mean
model_name_pairs
:
-
-
Student
-
Teacher
key
:
head_out
PostProcess
:
name
:
DistillationCTCLabelDecode
model_name
:
-
Student
key
:
head_out
multi_head
:
true
Metric
:
name
:
DistillationMetric
base_metric_name
:
RecMetric
main_indicator
:
acc
key
:
Student
ignore_space
:
false
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/train_list.txt
ratio_list
:
-
1.0
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
8
use_shared_memory
:
true
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
profiler_options
:
null
configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec_fp32_ultra.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_ppocr_v4
save_epoch_step
:
10
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
PPLCNetV3
scale
:
0.95
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
max_text_length
:
*max_text_length
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
192
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
16
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
16
configs/rec/PP-OCRv4/PP-OCRv4_server_rec.yml
0 → 100644
View file @
ed43fc11
Global
:
model_name
:
PP-OCRv4_server_rec
# To use static model for inference.
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_ppocr_v4_hgnet
save_epoch_step
:
10
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3.txt
d2s_train_image_shape
:
[
3
,
48
,
320
]
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
PPHGNet_small
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
max_text_length
:
*max_text_length
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
128
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv4/PP-OCRv4_server_rec_ampO2_ultra.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_ppocr_v4_hgnet
save_epoch_step
:
10
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3.txt
use_amp
:
True
amp_level
:
O2
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
PPHGNet_small
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
max_text_length
:
*max_text_length
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
256
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
16
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
16
configs/rec/PP-OCRv4/PP-OCRv4_server_rec_doc.yml
0 → 100644
View file @
ed43fc11
Global
:
model_name
:
PP-OCRv4_server_rec_doc
# To use static model for inference.
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_ppocr_v4_hgnet
save_epoch_step
:
10
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/dict/ppocrv4_doc_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3.txt
d2s_train_image_shape
:
[
3
,
48
,
320
]
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
PPHGNet_small
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
max_text_length
:
*max_text_length
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
128
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv4/PP-OCRv4_server_rec_fp32_ultra.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_ppocr_v4_hgnet
save_epoch_step
:
10
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
PPHGNet_small
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
max_text_length
:
*max_text_length
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
256
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
16
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
16
configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_svtr_large.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec/svtr_large/
save_epoch_step
:
10
# evaluation is run every 2000 iterations after the 0th iteration
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
40
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_svtr_large.txt
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.99
epsilon
:
1.0e-08
weight_decay
:
0.05
no_weight_decay_name
:
norm pos_embed char_node_embed pos_node_embed char_pos_embed vis_pos_embed
one_dim_param_no_weight_decay
:
true
lr
:
name
:
Cosine
learning_rate
:
0.00025
# 8gpus 64bs
warmup_epoch
:
5
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
null
Backbone
:
name
:
SVTRNet
img_size
:
-
48
-
320
out_char_num
:
40
out_channels
:
512
patch_merging
:
Conv
embed_dim
:
[
192
,
256
,
512
]
depth
:
[
6
,
6
,
9
]
num_heads
:
[
6
,
8
,
16
]
mixer
:
[
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
]
local_mixer
:
[[
5
,
5
],
[
5
,
5
],
[
5
,
5
]]
last_stage
:
False
prenorm
:
True
Head
:
name
:
MultiHead
use_pool
:
true
use_pos
:
true
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
256
depth
:
2
hidden_dims
:
256
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
true
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
64
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
SVTRRecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv4/en_PP-OCRv4_mobile_rec.yml
0 → 100644
View file @
ed43fc11
Global
:
model_name
:
en_PP-OCRv4_mobile_rec
# To use static model for inference.
debug
:
false
use_gpu
:
true
epoch_num
:
50
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_ppocr_v4
save_epoch_step
:
10
eval_batch_step
:
-
0
-
2000
cal_metric_during_train
:
true
pretrained_model
:
null
checkpoints
:
null
save_inference_dir
:
null
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/en_dict.txt
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.0005
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
null
Backbone
:
name
:
PPLCNetV3
scale
:
0.95
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
-
1
-
3
use_guide
:
true
Head
:
fc_decay
:
1.0e-05
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
25
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
null
-
NRTRLoss
:
null
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
false
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
-
48
-
320
-
3
max_text_length
:
25
-
RecAug
:
null
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
-
-
320
-
32
-
-
320
-
48
-
-
320
-
64
first_bs
:
96
fix_bs
:
false
divided_factor
:
-
8
-
16
is_training
:
true
loader
:
shuffle
:
true
batch_size_per_card
:
96
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
-
3
-
48
-
320
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
profiler_options
:
null
configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml
0 → 100644
View file @
ed43fc11
Global
:
model_name
:
PP-OCRv5_mobile_rec
# To use static model for inference.
debug
:
false
use_gpu
:
true
epoch_num
:
75
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/PP-OCRv5_mobile_rec
save_epoch_step
:
10
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
./ppocr/utils/dict/ppocrv5_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv5.txt
d2s_train_image_shape
:
[
3
,
48
,
320
]
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.0005
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
PPLCNetV3
scale
:
0.95
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
max_text_length
:
*max_text_length
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
128
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml
0 → 100644
View file @
ed43fc11
Global
:
model_name
:
PP-OCRv5_server_rec
# To use static model for inference.
debug
:
false
use_gpu
:
true
epoch_num
:
75
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/PP-OCRv5_server_rec
save_epoch_step
:
1
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
calc_epoch_interval
:
1
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
./ppocr/utils/dict/ppocrv5_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv5.txt
d2s_train_image_shape
:
[
3
,
48
,
320
]
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.0005
warmup_epoch
:
1
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
PPHGNetV2_B4
text_rec
:
True
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
128
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
16
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/SVTRv2/ch_RepSVTR_rec.yml
0 → 100644
View file @
ed43fc11
Global
:
model_name
:
ch_RepSVTR_rec
# To use static model for inference.
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/ch_RepSVTR_rec
save_epoch_step
:
10
eval_batch_step
:
[
0
,
1000
]
cal_metric_during_train
:
False
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_repsvtr.txt
d2s_train_image_shape
:
[
3
,
48
,
320
]
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1.e-8
weight_decay
:
0.025
no_weight_decay_name
:
norm
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
0.001
# 8gpus 192bs
warmup_epoch
:
5
Architecture
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
RepSVTR
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
256
depth
:
2
hidden_dims
:
256
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
num_decoder_layers
:
2
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
192
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/SVTRv2/ch_RepSVTR_rec_gtc.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/ch_RepSVTR_rec_gtc
save_epoch_step
:
10
eval_batch_step
:
[
0
,
1000
]
cal_metric_during_train
:
False
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_repsvtr.txt
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1.e-8
weight_decay
:
0.025
no_weight_decay_name
:
norm
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
0.001
# 8gpus 192bs
warmup_epoch
:
5
Architecture
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
RepSVTR
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
256
depth
:
2
hidden_dims
:
256
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
num_decoder_layers
:
2
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
192
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/SVTRv2/ch_SVTRv2_rec.yml
0 → 100644
View file @
ed43fc11
Global
:
model_name
:
ch_SVTRv2_rec
# To use static model for inference.
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/ch_SVTRv2_rec
save_epoch_step
:
10
eval_batch_step
:
[
0
,
1000
]
cal_metric_during_train
:
False
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_svrtv2.txt
d2s_train_image_shape
:
[
3
,
48
,
320
]
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1.e-8
weight_decay
:
0.05
no_weight_decay_name
:
norm
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
0.001
# 8gpus 192bs
warmup_epoch
:
5
Architecture
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
SVTRv2
use_pos_embed
:
False
dims
:
[
128
,
256
,
384
]
depths
:
[
6
,
6
,
6
]
num_heads
:
[
4
,
8
,
12
]
mixer
:
[[
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
],[
'
Conv'
,
'
Conv'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
],[
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
]]
local_k
:
[[
5
,
5
],
[
5
,
5
],
[
-1
,
-1
]]
sub_k
:
[[
2
,
1
],
[
2
,
1
],
[
-1
,
-1
]]
last_stage
:
False
use_pool
:
True
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
256
depth
:
2
hidden_dims
:
256
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
num_decoder_layers
:
2
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
192
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/SVTRv2/ch_SVTRv2_rec_distillation.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
100
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/ch_SVTRv2_rec_distill_lr00002/
save_epoch_step
:
5
eval_batch_step
:
-
0
-
1000
cal_metric_during_train
:
False
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_svtrv2_ch_distill.txt
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.99
epsilon
:
1.e-8
weight_decay
:
0.05
no_weight_decay_name
:
norm pos_embed patch_embed downsample
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
0.0002
# 8gpus 192bs
warmup_epoch
:
5
Architecture
:
model_type
:
rec
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Teacher
:
pretrained
:
./output/ch_SVTRv2_rec/best_accuracy
freeze_params
:
true
return_all_feats
:
true
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
null
Backbone
:
name
:
SVTRv2
use_pos_embed
:
False
dims
:
[
128
,
256
,
384
]
depths
:
[
6
,
6
,
6
]
num_heads
:
[
4
,
8
,
12
]
mixer
:
[[
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
],[
'
Conv'
,
'
Conv'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
],[
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
]]
local_k
:
[[
5
,
5
],
[
5
,
5
],
[
-1
,
-1
]]
sub_k
:
[[
2
,
1
],
[
2
,
1
],
[
-1
,
-1
]]
last_stage
:
False
use_pool
:
True
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
256
depth
:
2
hidden_dims
:
256
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
num_decoder_layers
:
2
max_text_length
:
*max_text_length
Student
:
pretrained
:
./output/ch_RepSVTR_rec/best_accuracy
freeze_params
:
false
return_all_feats
:
true
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
null
Backbone
:
name
:
RepSVTR
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
256
depth
:
2
hidden_dims
:
256
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
num_decoder_layers
:
2
max_text_length
:
*max_text_length
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationDKDLoss
:
weight
:
0.1
model_name_pairs
:
-
-
Student
-
Teacher
key
:
head_out
multi_head
:
true
alpha
:
1.0
beta
:
2.0
dis_head
:
gtc
name
:
dkd
-
DistillationCTCLoss
:
weight
:
1.0
model_name_list
:
-
Student
key
:
head_out
multi_head
:
true
-
DistillationNRTRLoss
:
weight
:
1.0
smoothing
:
false
model_name_list
:
-
Student
key
:
head_out
multi_head
:
true
-
DistillCTCLogits
:
weight
:
1.0
reduction
:
mean
model_name_pairs
:
-
-
Student
-
Teacher
key
:
head_out
PostProcess
:
name
:
DistillationCTCLabelDecode
model_name
:
-
Student
key
:
head_out
multi_head
:
true
Metric
:
name
:
DistillationMetric
base_metric_name
:
RecMetric
main_indicator
:
acc
key
:
Student
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
192
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/SVTRv2/ch_SVTRv2_rec_gtc.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
200
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/ch_SVTRv2_rec_gtc
save_epoch_step
:
10
eval_batch_step
:
[
0
,
1000
]
cal_metric_during_train
:
False
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_svrtv2.txt
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
epsilon
:
1.e-8
weight_decay
:
0.05
no_weight_decay_name
:
norm
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
0.001
# 8gpus 192bs
warmup_epoch
:
5
Architecture
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
SVTRv2
use_pos_embed
:
False
dims
:
[
128
,
256
,
384
]
depths
:
[
6
,
6
,
6
]
num_heads
:
[
4
,
8
,
12
]
mixer
:
[[
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
],[
'
Conv'
,
'
Conv'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
],[
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
]]
local_k
:
[[
5
,
5
],
[
5
,
5
],
[
-1
,
-1
]]
sub_k
:
[[
2
,
1
],
[
2
,
1
],
[
-1
,
-1
]]
last_stage
:
False
use_pool
:
True
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
256
depth
:
2
hidden_dims
:
256
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
*max_text_length
num_decoder_layers
:
2
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
NRTRLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
192
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/SVTRv2/ch_SVTRv2_rec_gtc_distill.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
100
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/ch_SVTRv2_rec_gtc_distill_lr00002/
save_epoch_step
:
5
eval_batch_step
:
-
0
-
1000
cal_metric_during_train
:
False
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_svtrv2_gtc_distill.txt
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.99
epsilon
:
1.e-8
weight_decay
:
0.05
no_weight_decay_name
:
norm pos_embed patch_embed downsample
one_dim_param_no_weight_decay
:
True
lr
:
name
:
Cosine
learning_rate
:
0.0002
# 8gpus 192bs
warmup_epoch
:
5
Architecture
:
model_type
:
rec
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Teacher
:
pretrained
:
./output/ch_SVTRv2_rec_gtc/best_accuracy
freeze_params
:
true
return_all_feats
:
true
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
null
Backbone
:
name
:
SVTRv2
use_pos_embed
:
False
dims
:
[
128
,
256
,
384
]
depths
:
[
6
,
6
,
6
]
num_heads
:
[
4
,
8
,
12
]
mixer
:
[[
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
,
'
Conv'
],[
'
Conv'
,
'
Conv'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
],[
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
,
'
Global'
]]
local_k
:
[[
5
,
5
],
[
5
,
5
],
[
-1
,
-1
]]
sub_k
:
[[
2
,
1
],
[
2
,
1
],
[
-1
,
-1
]]
last_stage
:
False
use_pool
:
True
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
256
depth
:
2
hidden_dims
:
256
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
num_decoder_layers
:
2
max_text_length
:
*max_text_length
Student
:
pretrained
:
./output/ch_RepSVTR_rec_gtc/best_accuracy
freeze_params
:
false
return_all_feats
:
true
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
null
Backbone
:
name
:
repvit_svtr
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
256
depth
:
2
hidden_dims
:
256
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
num_decoder_layers
:
2
max_text_length
:
*max_text_length
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationDKDLoss
:
weight
:
0.1
model_name_pairs
:
-
-
Student
-
Teacher
key
:
head_out
multi_head
:
true
alpha
:
1.0
beta
:
2.0
dis_head
:
gtc
name
:
dkd
-
DistillationCTCLoss
:
weight
:
1.0
model_name_list
:
-
Student
key
:
head_out
multi_head
:
true
-
DistillationNRTRLoss
:
weight
:
1.0
smoothing
:
false
model_name_list
:
-
Student
key
:
head_out
multi_head
:
true
-
DistillCTCLogits
:
weight
:
1.0
reduction
:
mean
model_name_pairs
:
-
-
Student
-
Teacher
key
:
head_out
PostProcess
:
name
:
DistillationCTCLabelDecode
model_name
:
-
Student
key
:
head_out
multi_head
:
true
Metric
:
name
:
DistillationMetric
base_metric_name
:
RecMetric
main_indicator
:
acc
key
:
Student
Train
:
dataset
:
name
:
MultiScaleDataSet
ds_width
:
false
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
sampler
:
name
:
MultiScaleSampler
scales
:
[[
320
,
32
],
[
320
,
48
],
[
320
,
64
]]
first_bs
:
&bs
192
fix_bs
:
false
divided_factor
:
[
8
,
16
]
# w, h
is_training
:
True
loader
:
shuffle
:
true
batch_size_per_card
:
*bs
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
gtc_encode
:
NRTRLabelEncode
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_gtc
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/UniMERNet.yaml
0 → 100644
View file @
ed43fc11
Global
:
model_name
:
UniMERNet
# To use static model for inference.
use_gpu
:
True
epoch_num
:
40
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/rec/unimernet/
save_epoch_step
:
5
# evaluation is run every 37880 iterations after the 0th iteration
eval_batch_step
:
[
0
,
37880
]
cal_metric_during_train
:
True
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/datasets/pme_demo/0000013.png
infer_mode
:
False
use_space_char
:
False
rec_char_dict_path
:
&rec_char_dict_path
ppocr/utils/dict/unimernet_tokenizer
input_size
:
&input_size
[
192
,
672
]
max_seq_len
:
&max_seq_len
1024
save_res_path
:
./output/rec/predicts_unimernet.txt
allow_resize_largeImg
:
False
d2s_train_image_shape
:
[
1
,
192
,
672
]
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
weight_decay
:
0.05
lr
:
name
:
LinearWarmupCosine
learning_rate
:
1e-4
start_lr
:
1e-5
min_lr
:
1e-8
warmup_steps
:
5000
Architecture
:
model_type
:
rec
algorithm
:
UniMERNet
in_channels
:
3
Transform
:
Backbone
:
name
:
DonutSwinModel
hidden_size
:
1024
num_layers
:
4
num_heads
:
[
4
,
8
,
16
,
32
]
add_pooling_layer
:
True
use_mask_token
:
False
Head
:
name
:
UniMERNetHead
max_new_tokens
:
1536
decoder_start_token_id
:
0
temperature
:
0.2
do_sample
:
False
top_p
:
0.95
encoder_hidden_size
:
1024
is_export
:
False
length_aware
:
True
Loss
:
name
:
UniMERNetLoss
PostProcess
:
name
:
UniMERNetDecode
rec_char_dict_path
:
*rec_char_dict_path
Metric
:
name
:
LaTeXOCRMetric
main_indicator
:
exp_rate
cal_bleu_score
:
True
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/UniMERNet/
label_file_list
:
[
"
./train_data/UniMERNet/train_unimernet_1M.txt"
]
transforms
:
-
UniMERNetImgDecode
:
input_size
:
*input_size
-
UniMERNetTrainTransform
:
-
UniMERNetImageFormat
:
-
UniMERNetLabelEncode
:
rec_char_dict_path
:
*rec_char_dict_path
max_seq_len
:
*max_seq_len
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
attention_mask'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
7
num_workers
:
0
collate_fn
:
UniMERNetCollator
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/UniMERNet/UniMER-Test/cpe
label_file_list
:
[
"
./train_data/UniMERNet/test_unimernet_cpe.txt"
]
transforms
:
-
UniMERNetImgDecode
:
input_size
:
*input_size
-
UniMERNetTestTransform
:
-
UniMERNetImageFormat
:
-
UniMERNetLabelEncode
:
max_seq_len
:
*max_seq_len
rec_char_dict_path
:
*rec_char_dict_path
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
attention_mask'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
30
num_workers
:
0
collate_fn
:
UniMERNetCollator
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_mobile_pp-OCRv2
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_mobile_pp-OCRv2.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
decay_epochs
:
[
700
]
values
:
[
0.001
,
0.0001
]
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
2.0e-05
Architecture
:
model_type
:
rec
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
Loss
:
name
:
CTCLoss
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
8
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_pp-OCRv2_distillation
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_pp-OCRv2_distillation.txt
amp_custom_black_list
:
[
'
matmul'
,
'
matmul_v2'
,
'
elementwise_add'
]
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
decay_epochs
:
[
700
]
values
:
[
0.001
,
0.0001
]
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
2.0e-05
Architecture
:
model_type
:
&model_type
"
rec"
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Teacher
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
Student
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationCTCLoss
:
weight
:
1.0
model_name_list
:
[
"
Student"
,
"
Teacher"
]
key
:
head_out
-
DistillationDMLLoss
:
weight
:
1.0
act
:
"
softmax"
use_log
:
true
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
head_out
-
DistillationDistanceLoss
:
weight
:
1.0
mode
:
"
l2"
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
backbone_out
PostProcess
:
name
:
DistillationCTCLabelDecode
model_name
:
[
"
Student"
,
"
Teacher"
]
key
:
head_out
Metric
:
name
:
DistillationMetric
base_metric_name
:
RecMetric
main_indicator
:
acc
key
:
"
Student"
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_sections
:
1
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
8
configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
0 → 100644
View file @
ed43fc11
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_mobile_pp-OCRv2_enhanced_ctc_loss
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_mobile_pp-OCRv2_enhanced_ctc_loss.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
decay_epochs
:
[
700
]
values
:
[
0.001
,
0.0001
]
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
2.0e-05
Architecture
:
model_type
:
rec
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
64
Head
:
name
:
CTCHead
mid_channels
:
96
fc_decay
:
0.00002
return_feats
:
true
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
CTCLoss
:
use_focal_loss
:
false
weight
:
1.0
-
CenterLoss
:
weight
:
0.05
num_classes
:
6625
feat_dim
:
96
center_file_path
:
# you can also try to add ace loss on your own dataset
# - ACELoss:
# weight: 0.1
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecAug
:
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
-
label_ace
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
CTCLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label
-
length
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
8
Prev
1
…
5
6
7
8
9
10
11
12
13
…
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment