Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
4824c25b
Commit
4824c25b
authored
Jul 04, 2024
by
wangsen
Browse files
Initial commit
parents
Changes
396
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2616 additions
and
0 deletions
+2616
-0
configs/kie/layoutlm_series/re_layoutlmv2_xfund_zh.yml
configs/kie/layoutlm_series/re_layoutlmv2_xfund_zh.yml
+123
-0
configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml
configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml
+123
-0
configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml
configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml
+121
-0
configs/kie/layoutlm_series/ser_layoutlmv2_xfund_zh.yml
configs/kie/layoutlm_series/ser_layoutlmv2_xfund_zh.yml
+122
-0
configs/kie/layoutlm_series/ser_layoutxlm_xfund_zh.yml
configs/kie/layoutlm_series/ser_layoutxlm_xfund_zh.yml
+122
-0
configs/kie/sdmgr/kie_unet_sdmgr.yml
configs/kie/sdmgr/kie_unet_sdmgr.yml
+111
-0
configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml
configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml
+130
-0
configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh_udml.yml
configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh_udml.yml
+177
-0
configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml
configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml
+138
-0
configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml
configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml
+182
-0
configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml
configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml
+133
-0
configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
+209
-0
configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml
configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml
+133
-0
configs/rec/PP-OCRv3/multi_language/.gitkeep
configs/rec/PP-OCRv3/multi_language/.gitkeep
+0
-0
configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml
configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml
+132
-0
configs/rec/PP-OCRv3/multi_language/chinese_cht_PP-OCRv3_rec.yml
.../rec/PP-OCRv3/multi_language/chinese_cht_PP-OCRv3_rec.yml
+132
-0
configs/rec/PP-OCRv3/multi_language/cyrillic_PP-OCRv3_rec.yml
...igs/rec/PP-OCRv3/multi_language/cyrillic_PP-OCRv3_rec.yml
+132
-0
configs/rec/PP-OCRv3/multi_language/devanagari_PP-OCRv3_rec.yml
...s/rec/PP-OCRv3/multi_language/devanagari_PP-OCRv3_rec.yml
+132
-0
configs/rec/PP-OCRv3/multi_language/japan_PP-OCRv3_rec.yml
configs/rec/PP-OCRv3/multi_language/japan_PP-OCRv3_rec.yml
+132
-0
configs/rec/PP-OCRv3/multi_language/ka_PP-OCRv3_rec.yml
configs/rec/PP-OCRv3/multi_language/ka_PP-OCRv3_rec.yml
+132
-0
No files found.
Too many changes to show.
To preserve performance only
396 of 396+
files are displayed.
Plain diff
Email patch
configs/kie/layoutlm_series/re_layoutlmv2_xfund_zh.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
200
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/re_layoutlmv2_xfund_zh
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
ppstructure/docs/kie/input/zh_val_21.jpg
save_res_path
:
./output/re_layoutlmv2_xfund_zh/res/
Architecture
:
model_type
:
kie
algorithm
:
&algorithm
"
LayoutLMv2"
Transform
:
Backbone
:
name
:
LayoutLMv2ForRe
pretrained
:
True
checkpoints
:
Loss
:
name
:
LossFromOutput
key
:
loss
reduction
:
mean
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
clip_norm
:
10
lr
:
learning_rate
:
0.00005
warmup_epoch
:
10
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
VQAReTokenLayoutLMPostProcess
Metric
:
name
:
VQAReTokenMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/train.json
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
&class_path
train_data/XFUND/class_list_xfun.txt
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
collate_fn
:
ListCollator
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
collate_fn
:
ListCollator
configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
130
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/re_layoutxlm_xfund_zh
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
ppstructure/docs/kie/input/zh_val_21.jpg
save_res_path
:
./output/re_layoutxlm_xfund_zh/res/
Architecture
:
model_type
:
kie
algorithm
:
&algorithm
"
LayoutXLM"
Transform
:
Backbone
:
name
:
LayoutXLMForRe
pretrained
:
True
checkpoints
:
Loss
:
name
:
LossFromOutput
key
:
loss
reduction
:
mean
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
clip_norm
:
10
lr
:
learning_rate
:
0.00005
warmup_epoch
:
10
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
VQAReTokenLayoutLMPostProcess
Metric
:
name
:
VQAReTokenMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/train.json
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
&class_path
train_data/XFUND/class_list_xfun.txt
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
TensorizeEntitiesRelations
:
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
2
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
TensorizeEntitiesRelations
:
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
200
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/ser_layoutlm_xfund_zh
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
ppstructure/docs/kie/input/zh_val_42.jpg
save_res_path
:
./output/re_layoutlm_xfund_zh/res
Architecture
:
model_type
:
kie
algorithm
:
&algorithm
"
LayoutLM"
Transform
:
Backbone
:
name
:
LayoutLMForSer
pretrained
:
True
checkpoints
:
num_classes
:
&num_classes
7
Loss
:
name
:
VQASerTokenLayoutLMLoss
num_classes
:
*num_classes
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Linear
learning_rate
:
0.00005
epochs
:
*epoch_num
warmup_epoch
:
2
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
VQASerTokenLayoutLMPostProcess
class_path
:
&class_path
train_data/XFUND/class_list_xfun.txt
Metric
:
name
:
VQASerTokenMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/train.json
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
16
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
configs/kie/layoutlm_series/ser_layoutlmv2_xfund_zh.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
200
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/ser_layoutlmv2_xfund_zh/
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
ppstructure/docs/kie/input/zh_val_42.jpg
save_res_path
:
./output/ser_layoutlmv2_xfund_zh/res/
Architecture
:
model_type
:
kie
algorithm
:
&algorithm
"
LayoutLMv2"
Transform
:
Backbone
:
name
:
LayoutLMv2ForSer
pretrained
:
True
checkpoints
:
num_classes
:
&num_classes
7
Loss
:
name
:
VQASerTokenLayoutLMLoss
num_classes
:
*num_classes
key
:
"
backbone_out"
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Linear
learning_rate
:
0.00005
epochs
:
*epoch_num
warmup_epoch
:
2
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
VQASerTokenLayoutLMPostProcess
class_path
:
&class_path
train_data/XFUND/class_list_xfun.txt
Metric
:
name
:
VQASerTokenMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/train.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
configs/kie/layoutlm_series/ser_layoutxlm_xfund_zh.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
200
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/ser_layoutxlm_xfund_zh
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
ppstructure/docs/kie/input/zh_val_42.jpg
save_res_path
:
./output/ser_layoutxlm_xfund_zh/res
Architecture
:
model_type
:
kie
algorithm
:
&algorithm
"
LayoutXLM"
Transform
:
Backbone
:
name
:
LayoutXLMForSer
pretrained
:
True
checkpoints
:
num_classes
:
&num_classes
7
Loss
:
name
:
VQASerTokenLayoutLMLoss
num_classes
:
*num_classes
key
:
"
backbone_out"
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Linear
learning_rate
:
0.00005
epochs
:
*epoch_num
warmup_epoch
:
2
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
VQASerTokenLayoutLMPostProcess
class_path
:
&class_path
train_data/XFUND/class_list_xfun.txt
Metric
:
name
:
VQASerTokenMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/train.json
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
configs/kie/sdmgr/kie_unet_sdmgr.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
60
log_smooth_window
:
20
print_batch_step
:
50
save_model_dir
:
./output/kie_5/
save_epoch_step
:
50
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
0
,
80
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
False
cal_metric_during_train
:
False
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
class_path
:
&class_path
./train_data/wildreceipt/class_list.txt
infer_img
:
./train_data/wildreceipt/1.txt
save_res_path
:
./output/sdmgr_kie/predicts_kie.txt
img_scale
:
[
1024
,
512
]
Architecture
:
model_type
:
kie
algorithm
:
SDMGR
Transform
:
Backbone
:
name
:
Kie_backbone
Head
:
name
:
SDMGRHead
Loss
:
name
:
SDMGRLoss
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
learning_rate
:
0.001
decay_epochs
:
[
60
,
80
,
100
]
values
:
[
0.001
,
0.0001
,
0.00001
]
warmup_epoch
:
2
regularizer
:
name
:
'
L2'
factor
:
0.00005
PostProcess
:
name
:
None
Metric
:
name
:
KIEMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/wildreceipt/
label_file_list
:
[
'
./train_data/wildreceipt/wildreceipt_train.txt'
]
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
KieLabelEncode
:
# Class handling label
character_dict_path
:
./train_data/wildreceipt/dict.txt
class_path
:
*class_path
-
KieResize
:
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
relations'
,
'
texts'
,
'
points'
,
'
labels'
,
'
tag'
,
'
shape'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
4
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/wildreceipt
label_file_list
:
-
./train_data/wildreceipt/wildreceipt_test.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
KieLabelEncode
:
# Class handling label
character_dict_path
:
./train_data/wildreceipt/dict.txt
-
KieResize
:
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
relations'
,
'
texts'
,
'
points'
,
'
labels'
,
'
tag'
,
'
ori_image'
,
'
ori_boxes'
,
'
shape'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
4
configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
130
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/re_vi_layoutxlm_xfund_zh
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
ppstructure/docs/kie/input/zh_val_21.jpg
save_res_path
:
./output/re/xfund_zh/with_gt
kie_rec_model_dir
:
kie_det_model_dir
:
Architecture
:
model_type
:
kie
algorithm
:
&algorithm
"
LayoutXLM"
Transform
:
Backbone
:
name
:
LayoutXLMForRe
pretrained
:
True
mode
:
vi
checkpoints
:
Loss
:
name
:
LossFromOutput
key
:
loss
reduction
:
mean
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
clip_norm
:
10
lr
:
learning_rate
:
0.00005
warmup_epoch
:
10
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
VQAReTokenLayoutLMPostProcess
Metric
:
name
:
VQAReTokenMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/train.json
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
&class_path
train_data/XFUND/class_list_xfun.txt
use_textline_bbox_info
:
&use_textline_bbox_info
True
order_method
:
&order_method
"
tb-yx"
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
TensorizeEntitiesRelations
:
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
2
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
*class_path
use_textline_bbox_info
:
*use_textline_bbox_info
order_method
:
*order_method
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
TensorizeEntitiesRelations
:
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh_udml.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
130
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/re_vi_layoutxlm_xfund_zh_udml
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
ppstructure/docs/kie/input/zh_val_21.jpg
save_res_path
:
./output/re/xfund_zh/with_gt
Architecture
:
model_type
:
&model_type
"
kie"
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Teacher
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
&algorithm
"
LayoutXLM"
Transform
:
Backbone
:
name
:
LayoutXLMForRe
pretrained
:
True
mode
:
vi
checkpoints
:
Student
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
*algorithm
Transform
:
Backbone
:
name
:
LayoutXLMForRe
pretrained
:
True
mode
:
vi
checkpoints
:
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationLossFromOutput
:
weight
:
1.0
model_name_list
:
[
"
Student"
,
"
Teacher"
]
key
:
loss
reduction
:
mean
-
DistillationVQADistanceLoss
:
weight
:
0.5
mode
:
"
l2"
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
hidden_states
index
:
5
name
:
"
loss_5"
-
DistillationVQADistanceLoss
:
weight
:
0.5
mode
:
"
l2"
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
hidden_states
index
:
8
name
:
"
loss_8"
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
clip_norm
:
10
lr
:
learning_rate
:
0.00005
warmup_epoch
:
10
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
DistillationRePostProcess
model_name
:
[
"
Student"
,
"
Teacher"
]
key
:
null
Metric
:
name
:
DistillationMetric
base_metric_name
:
VQAReTokenMetric
main_indicator
:
hmean
key
:
"
Student"
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/train.json
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
&class_path
train_data/XFUND/class_list_xfun.txt
use_textline_bbox_info
:
&use_textline_bbox_info
True
# [None, "tb-yx"]
order_method
:
&order_method
"
tb-yx"
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
TensorizeEntitiesRelations
:
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
2
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
*class_path
use_textline_bbox_info
:
*use_textline_bbox_info
order_method
:
*order_method
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
TensorizeEntitiesRelations
:
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
200
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/ser_vi_layoutxlm_xfund_zh
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
ppstructure/docs/kie/input/zh_val_42.jpg
d2s_train_image_shape
:
[
3
,
224
,
224
]
# if you want to predict using the groundtruth ocr info,
# you can use the following config
# infer_img: train_data/XFUND/zh_val/val.json
# infer_mode: False
save_res_path
:
./output/ser/xfund_zh/res
kie_rec_model_dir
:
kie_det_model_dir
:
amp_custom_white_list
:
[
'
scale'
,
'
concat'
,
'
elementwise_add'
]
Architecture
:
model_type
:
kie
algorithm
:
&algorithm
"
LayoutXLM"
Transform
:
Backbone
:
name
:
LayoutXLMForSer
pretrained
:
True
checkpoints
:
# one of base or vi
mode
:
vi
num_classes
:
&num_classes
7
Loss
:
name
:
VQASerTokenLayoutLMLoss
num_classes
:
*num_classes
key
:
"
backbone_out"
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Linear
learning_rate
:
0.00005
epochs
:
*epoch_num
warmup_epoch
:
2
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
VQASerTokenLayoutLMPostProcess
class_path
:
&class_path
train_data/XFUND/class_list_xfun.txt
Metric
:
name
:
VQASerTokenMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/train.json
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
use_textline_bbox_info
:
&use_textline_bbox_info
True
# one of [None, "tb-yx"]
order_method
:
&order_method
"
tb-yx"
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
use_textline_bbox_info
:
*use_textline_bbox_info
order_method
:
*order_method
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml
0 → 100644
View file @
4824c25b
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
200
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/ser_vi_layoutxlm_xfund_zh_udml
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
ppstructure/docs/kie/input/zh_val_42.jpg
save_res_path
:
./output/ser_layoutxlm_xfund_zh/res
Architecture
:
model_type
:
&model_type
"
kie"
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Teacher
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
&algorithm
"
LayoutXLM"
Transform
:
Backbone
:
name
:
LayoutXLMForSer
pretrained
:
True
# one of base or vi
mode
:
vi
checkpoints
:
num_classes
:
&num_classes
7
Student
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
*algorithm
Transform
:
Backbone
:
name
:
LayoutXLMForSer
pretrained
:
True
# one of base or vi
mode
:
vi
checkpoints
:
num_classes
:
*num_classes
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationVQASerTokenLayoutLMLoss
:
weight
:
1.0
model_name_list
:
[
"
Student"
,
"
Teacher"
]
key
:
backbone_out
num_classes
:
*num_classes
-
DistillationSERDMLLoss
:
weight
:
1.0
act
:
"
softmax"
use_log
:
true
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
backbone_out
-
DistillationVQADistanceLoss
:
weight
:
0.5
mode
:
"
l2"
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
hidden_states_5
name
:
"
loss_5"
-
DistillationVQADistanceLoss
:
weight
:
0.5
mode
:
"
l2"
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
hidden_states_8
name
:
"
loss_8"
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Linear
learning_rate
:
0.00005
epochs
:
*epoch_num
warmup_epoch
:
10
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
DistillationSerPostProcess
model_name
:
[
"
Student"
,
"
Teacher"
]
key
:
backbone_out
class_path
:
&class_path
train_data/XFUND/class_list_xfun.txt
Metric
:
name
:
DistillationMetric
base_metric_name
:
VQASerTokenMetric
main_indicator
:
hmean
key
:
"
Student"
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/train.json
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
# one of [None, "tb-yx"]
order_method
:
&order_method
"
tb-yx"
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
4
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
order_method
:
*order_method
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
attention_mask'
,
'
token_type_ids'
,
'
image'
,
'
labels'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml
0 → 100644
View file @
4824c25b
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
500
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_ppocr_v3
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
SARLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
False
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
max_text_length
:
*max_text_length
-
RecAug
:
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
0 → 100644
View file @
4824c25b
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
800
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec_ppocr_v3_distillation
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/ppocr_keys_v1.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3_distillation.txt
d2s_train_image_shape
:
[
3
,
48
,
-1
]
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
decay_epochs
:
[
700
]
values
:
[
0.0005
,
0.00005
]
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
&model_type
"
rec"
name
:
DistillationModel
algorithm
:
Distillation
Models
:
Teacher
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Student
:
pretrained
:
freeze_params
:
false
return_all_feats
:
true
model_type
:
*model_type
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
CombinedLoss
loss_config_list
:
-
DistillationDMLLoss
:
weight
:
1.0
act
:
"
softmax"
use_log
:
true
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
head_out
multi_head
:
True
dis_head
:
ctc
name
:
dml_ctc
-
DistillationDMLLoss
:
weight
:
0.5
act
:
"
softmax"
use_log
:
true
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
head_out
multi_head
:
True
dis_head
:
sar
name
:
dml_sar
-
DistillationDistanceLoss
:
weight
:
1.0
mode
:
"
l2"
model_name_pairs
:
-
[
"
Student"
,
"
Teacher"
]
key
:
backbone_out
-
DistillationCTCLoss
:
weight
:
1.0
model_name_list
:
[
"
Student"
,
"
Teacher"
]
key
:
head_out
multi_head
:
True
-
DistillationSARLoss
:
weight
:
1.0
model_name_list
:
[
"
Student"
,
"
Teacher"
]
key
:
head_out
multi_head
:
True
PostProcess
:
name
:
DistillationCTCLabelDecode
model_name
:
[
"
Student"
,
"
Teacher"
]
key
:
head_out
multi_head
:
True
Metric
:
name
:
DistillationMetric
base_metric_name
:
RecMetric
main_indicator
:
acc
key
:
"
Student"
ignore_space
:
False
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
max_text_length
:
*max_text_length
-
RecAug
:
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml
0 → 100644
View file @
4824c25b
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
500
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/v3_en_mobile
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/en_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3_en.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
SARLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
False
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
max_text_length
:
*max_text_length
-
RecAug
:
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv3/multi_language/.gitkeep
0 → 100644
View file @
4824c25b
configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml
0 → 100644
View file @
4824c25b
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
500
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/v3_arabic_mobile
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
./doc/imgs_words/arabic/ar_2.jpg
character_dict_path
:
ppocr/utils/dict/arabic_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3_arabic.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
SARLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
False
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
-
RecAug
:
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv3/multi_language/chinese_cht_PP-OCRv3_rec.yml
0 → 100644
View file @
4824c25b
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
500
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/v3_chinese_cht_mobile
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/dict/chinese_cht_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3_chinese_cht.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
SARLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
False
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
-
RecAug
:
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv3/multi_language/cyrillic_PP-OCRv3_rec.yml
0 → 100644
View file @
4824c25b
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
500
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/v3_cyrillic_mobile
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/dict/cyrillic_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3_cyrillic.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
SARLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
False
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
-
RecAug
:
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv3/multi_language/devanagari_PP-OCRv3_rec.yml
0 → 100644
View file @
4824c25b
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
500
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/v3_devanagari_mobile
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/dict/devanagari_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3_devanagari.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
SARLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
False
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
-
RecAug
:
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv3/multi_language/japan_PP-OCRv3_rec.yml
0 → 100644
View file @
4824c25b
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
500
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/v3_japan_mobile
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/dict/japan_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3_japan.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
SARLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
False
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
-
RecAug
:
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
configs/rec/PP-OCRv3/multi_language/ka_PP-OCRv3_rec.yml
0 → 100644
View file @
4824c25b
Global
:
debug
:
false
use_gpu
:
true
epoch_num
:
500
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/v3_ka_mobile
save_epoch_step
:
3
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
true
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
false
infer_img
:
doc/imgs_words/ch/word_1.jpg
character_dict_path
:
ppocr/utils/dict/ka_dict.txt
max_text_length
:
&max_text_length
25
infer_mode
:
false
use_space_char
:
true
distributed
:
true
save_res_path
:
./output/rec/predicts_ppocrv3_ka.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Cosine
learning_rate
:
0.001
warmup_epoch
:
5
regularizer
:
name
:
L2
factor
:
3.0e-05
Architecture
:
model_type
:
rec
algorithm
:
SVTR_LCNet
Transform
:
Backbone
:
name
:
MobileNetV1Enhance
scale
:
0.5
last_conv_stride
:
[
1
,
2
]
last_pool_type
:
avg
last_pool_kernel_size
:
[
2
,
2
]
Head
:
name
:
MultiHead
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
64
depth
:
2
hidden_dims
:
120
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
SARHead
:
enc_dim
:
512
max_text_length
:
*max_text_length
Loss
:
name
:
MultiLoss
loss_config_list
:
-
CTCLoss
:
-
SARLoss
:
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
ignore_space
:
False
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
ext_op_transform_idx
:
1
label_file_list
:
-
./train_data/train_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
RecConAug
:
prob
:
0.5
ext_data_num
:
2
image_shape
:
[
48
,
320
,
3
]
-
RecAug
:
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
true
batch_size_per_card
:
128
drop_last
:
true
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data
label_file_list
:
-
./train_data/val_list.txt
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
false
-
MultiLabelEncode
:
-
RecResizeImg
:
image_shape
:
[
3
,
48
,
320
]
-
KeepKeys
:
keep_keys
:
-
image
-
label_ctc
-
label_sar
-
length
-
valid_ratio
loader
:
shuffle
:
false
drop_last
:
false
batch_size_per_card
:
128
num_workers
:
4
Prev
1
…
11
12
13
14
15
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment