Commit 4824c25b authored by wangsen's avatar wangsen
Browse files

Initial commit

parents
Global:
use_gpu: true
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_devanagari_lite
save_epoch_step: 3
eval_batch_step:
- 0
- 2000
cal_metric_during_train: true
pretrained_model: null
checkpoints: null
save_inference_dir: null
use_visualdl: false
infer_img: null
character_dict_path: ppocr/utils/dict/devanagari_dict.txt
max_text_length: 25
infer_mode: false
use_space_char: true
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: L2
factor: 1.0e-05
Architecture:
model_type: rec
algorithm: CRNN
Transform: null
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride:
- 1
- 2
- 2
- 2
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 1.0e-05
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/devanagari_train.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- RecAug: null
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: true
batch_size_per_card: 256
drop_last: true
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/devanagari_val.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: false
drop_last: false
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: True
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_en_number_lite
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
# for data or label process
character_dict_path: ppocr/utils/en_dict.txt
max_text_length: 25
infer_mode: False
use_space_char: True
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 0.00001
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/train_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- RecAug:
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/eval_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: True
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_french_lite
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
# for data or label process
character_dict_path: ppocr/utils/dict/french_dict.txt
max_text_length: 25
infer_mode: False
use_space_char: False
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 0.00001
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/train_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- RecAug:
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/eval_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: True
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_german_lite
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
# for data or label process
character_dict_path: ppocr/utils/dict/german_dict.txt
max_text_length: 25
infer_mode: False
use_space_char: False
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 0.00001
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/train_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- RecAug:
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/eval_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: True
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_japan_lite
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
# for data or label process
character_dict_path: ppocr/utils/dict/japan_dict.txt
max_text_length: 25
infer_mode: False
use_space_char: False
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 0.00001
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/train_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- RecAug:
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/eval_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: True
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_korean_lite
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
# for data or label process
character_dict_path: ppocr/utils/dict/korean_dict.txt
max_text_length: 25
infer_mode: False
use_space_char: False
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 0.00001
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/train_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- RecAug:
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/eval_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: true
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_latin_lite
save_epoch_step: 3
eval_batch_step:
- 0
- 2000
cal_metric_during_train: true
pretrained_model: null
checkpoints: null
save_inference_dir: null
use_visualdl: false
infer_img: null
character_dict_path: ppocr/utils/dict/latin_dict.txt
max_text_length: 25
infer_mode: false
use_space_char: true
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: L2
factor: 1.0e-05
Architecture:
model_type: rec
algorithm: CRNN
Transform: null
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride:
- 1
- 2
- 2
- 2
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 1.0e-05
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/latin_train.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- RecAug: null
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: true
batch_size_per_card: 256
drop_last: true
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/latin_val.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: false
drop_last: false
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: True
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_multi_language_lite
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
# for data or label process
character_dict_path:
# Set the language of training, if set, select the default dictionary file
character_type:
max_text_length: 25
infer_mode: False
use_space_char: True
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 0.00001
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list: ["./train_data/train_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- RecAug:
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list: ["./train_data/val_list.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: True
epoch_num: 240
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/can/
save_epoch_step: 1
# evaluation is run every 1105 iterations (1 epoch)(batch_size = 8)
eval_batch_step: [0, 1105]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/datasets/crohme_demo/hme_00.jpg
# for data or label process
character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
max_text_length: 36
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_can.txt
Optimizer:
name: Momentum
momentum: 0.9
clip_norm_global: 100.0
lr:
name: TwoStepCosine
learning_rate: 0.01
warmup_epoch: 1
weight_decay: 0.0001
Architecture:
model_type: rec
algorithm: CAN
in_channels: 1
Transform:
Backbone:
name: DenseNet
growthRate: 24
reduction: 0.5
bottleneck: True
use_dropout: True
input_channel: 1
Head:
name: CANHead
in_channel: 684
out_channel: 111
max_text_length: 36
ratio: 16
attdecoder:
is_train: True
input_size: 256
hidden_size: 256
encoder_out_channel: 684
dropout: True
dropout_ratio: 0.5
word_num: 111
counting_decoder_out_channel: 111
attention:
attention_dim: 512
word_conv_kernel: 1
Loss:
name: CANLoss
PostProcess:
name: CANLabelDecode
Metric:
name: CANMetric
main_indicator: exp_rate
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/CROHME/training/images/
label_file_list: ["./train_data/CROHME/training/labels.txt"]
transforms:
- DecodeImage:
channel_first: False
- NormalizeImage:
mean: [0,0,0]
std: [1,1,1]
order: 'hwc'
- GrayImageChannelFormat:
inverse: True
- CANLabelEncode:
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
loader:
shuffle: True
batch_size_per_card: 8
drop_last: False
num_workers: 4
collate_fn: DyMaskCollator
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/CROHME/evaluation/images/
label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
transforms:
- DecodeImage:
channel_first: False
- NormalizeImage:
mean: [0,0,0]
std: [1,1,1]
order: 'hwc'
- GrayImageChannelFormat:
inverse: True
- CANLabelEncode:
lower: False
- KeepKeys:
keep_keys: ['image', 'label']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 1
num_workers: 4
collate_fn: DyMaskCollator
Global:
use_gpu: True
epoch_num: 8
log_smooth_window: 20
print_batch_step: 5
save_model_dir: ./output/rec/pren_new
save_epoch_step: 3
# evaluation is run every 2000 iterations after the 4000th iteration
eval_batch_step: [4000, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process
character_dict_path:
max_text_length: &max_text_length 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_pren.txt
Optimizer:
name: Adadelta
lr:
name: Piecewise
decay_epochs: [2, 5, 7]
values: [0.5, 0.1, 0.01, 0.001]
Architecture:
model_type: rec
algorithm: PREN
in_channels: 3
Backbone:
name: EfficientNetb3_PREN
Neck:
name: PRENFPN
n_r: 5
d_model: 384
max_len: *max_text_length
dropout: 0.1
Head:
name: PRENHead
Loss:
name: PRENLoss
PostProcess:
name: PRENLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage:
img_mode: BGR
channel_first: False
- PRENLabelEncode:
- RecAug:
- PRENResizeImg:
image_shape: [64, 256] # h,w
- KeepKeys:
keep_keys: ['image', 'label']
loader:
shuffle: True
batch_size_per_card: 128
drop_last: True
num_workers: 8
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/validation/
transforms:
- DecodeImage:
img_mode: BGR
channel_first: False
- PRENLabelEncode:
- PRENResizeImg:
image_shape: [64, 256] # h,w
- KeepKeys:
keep_keys: ['image', 'label']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 64
num_workers: 8
Global:
use_gpu: true
epoch_num: 72
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/ic15/
save_epoch_step: 3
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir: ./
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path: ppocr/utils/en_dict.txt
max_text_length: 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_ic15.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
learning_rate: 0.0005
regularizer:
name: 'L2'
factor: 0
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 96
Head:
name: CTCHead
fc_decay: 0
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/ic15_data/
label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
use_shared_memory: False
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/ic15_data
label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 4
use_shared_memory: False
Global:
use_gpu: True
epoch_num: 21
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/nrtr/
save_epoch_step: 1
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path: ppocr/utils/EN_symbol_dict.txt
max_text_length: 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_nrtr.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.99
clip_norm: 5.0
lr:
name: Cosine
learning_rate: 0.0005
warmup_epoch: 2
regularizer:
name: 'L2'
factor: 0.
Architecture:
model_type: rec
algorithm: NRTR
in_channels: 1
Transform:
Backbone:
name: MTB
cnn_num: 2
Head:
name: Transformer
d_model: 512
num_encoder_layers: 6
beam_size: -1 # When Beam size is greater than 0, it means to use beam search when evaluation.
Loss:
name: CELoss
smoothing: True
PostProcess:
name: NRTRLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- NRTRLabelEncode: # Class handling label
- GrayRecResizeImg:
image_shape: [100, 32] # W H
resize_type: PIL # PIL or OpenCV
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 512
drop_last: True
num_workers: 8
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/evaluation/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- NRTRLabelEncode: # Class handling label
- GrayRecResizeImg:
image_shape: [100, 32] # W H
resize_type: PIL # PIL or OpenCV
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 4
use_shared_memory: False
Global:
use_gpu: True
epoch_num: 72
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/mv3_none_bilstm_ctc/
save_epoch_step: 3
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path:
max_text_length: 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_mv3_none_bilstm_ctc.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
learning_rate: 0.0005
regularizer:
name: 'L2'
factor: 0
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 96
Head:
name: CTCHead
fc_decay: 0
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/validation/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 4
Global:
use_gpu: True
epoch_num: 72
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/mv3_none_none_ctc/
save_epoch_step: 3
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path:
max_text_length: 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_mv3_none_none_ctc.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
learning_rate: 0.0005
regularizer:
name: 'L2'
factor: 0
Architecture:
model_type: rec
algorithm: Rosetta
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
Neck:
name: SequenceEncoder
encoder_type: reshape
Head:
name: CTCHead
fc_decay: 0.0004
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/validation/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: True
epoch_num: 72
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/rec_mv3_tps_bilstm_att/
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process
character_dict_path:
max_text_length: 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_mv3_tps_bilstm_att.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
learning_rate: 0.0005
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
model_type: rec
algorithm: RARE
Transform:
name: TPS
num_fiducial: 20
loc_lr: 0.1
model_name: small
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 96
Head:
name: AttentionHead
hidden_size: 96
Loss:
name: AttentionLoss
PostProcess:
name: AttnLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- AttnLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/validation/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- AttnLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 1
Global:
use_gpu: True
epoch_num: 72
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/mv3_tps_bilstm_ctc/
save_epoch_step: 3
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path:
max_text_length: 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_mv3_tps_bilstm_ctc.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
learning_rate: 0.0005
regularizer:
name: 'L2'
factor: 0
Architecture:
model_type: rec
algorithm: STARNet
Transform:
name: TPS
num_fiducial: 20
loc_lr: 0.1
model_name: small
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 96
Head:
name: CTCHead
fc_decay: 0.0004
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/validation/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 4
Global:
use_gpu: true
epoch_num: 5
log_smooth_window: 20
print_batch_step: 20
save_model_dir: ./output/rec/rec_r31_robustscanner/
save_epoch_step: 1
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path: ppocr/utils/dict90.txt
max_text_length: &max_text_length 40
infer_mode: False
use_space_char: False
rm_symbol: True
save_res_path: ./output/rec/predicts_robustscanner.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Piecewise
decay_epochs: [3, 4]
values: [0.001, 0.0001, 0.00001]
regularizer:
name: 'L2'
factor: 0
Architecture:
model_type: rec
algorithm: RobustScanner
Transform:
Backbone:
name: ResNet31
init_type: KaimingNormal
Head:
name: RobustScannerHead
enc_outchannles: 128
hybrid_dec_rnn_layers: 2
hybrid_dec_dropout: 0
position_dec_rnn_layers: 2
start_idx: 91
mask: True
padding_idx: 92
encode_value: False
max_text_length: *max_text_length
Loss:
name: SARLoss
PostProcess:
name: SARLabelDecode
Metric:
name: RecMetric
is_filter: True
Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- SARLabelEncode: # Class handling label
- RobustScannerRecResizeImg:
image_shape: [3, 48, 48, 160] # h:48 w:[48,160]
width_downsample_ratio: 0.25
max_text_length: *max_text_length
- KeepKeys:
keep_keys: ['image', 'label', 'valid_ratio', 'word_positons'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 64
drop_last: True
num_workers: 8
use_shared_memory: False
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/evaluation/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- SARLabelEncode: # Class handling label
- RobustScannerRecResizeImg:
image_shape: [3, 48, 48, 160]
max_text_length: *max_text_length
width_downsample_ratio: 0.25
- KeepKeys:
keep_keys: ['image', 'label', 'valid_ratio', 'word_positons'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 64
num_workers: 4
use_shared_memory: False
Global:
use_gpu: true
epoch_num: 5
log_smooth_window: 20
print_batch_step: 20
save_model_dir: ./sar_rec
save_epoch_step: 1
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
# for data or label process
character_dict_path: ppocr/utils/dict90.txt
max_text_length: 30
infer_mode: False
use_space_char: False
rm_symbol: True
save_res_path: ./output/rec/predicts_sar.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Piecewise
decay_epochs: [3, 4]
values: [0.001, 0.0001, 0.00001]
regularizer:
name: 'L2'
factor: 0
Architecture:
model_type: rec
algorithm: SAR
Transform:
Backbone:
name: ResNet31
Head:
name: SARHead
Loss:
name: SARLoss
PostProcess:
name: SARLabelDecode
Metric:
name: RecMetric
Train:
dataset:
name: SimpleDataSet
label_file_list: ['./train_data/train_list.txt']
data_dir: ./train_data/
ratio_list: 1.0
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- SARLabelEncode: # Class handling label
- SARRecResizeImg:
image_shape: [3, 48, 48, 160] # h:48 w:[48,160]
width_downsample_ratio: 0.25
- KeepKeys:
keep_keys: ['image', 'label', 'valid_ratio'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 64
drop_last: True
num_workers: 8
use_shared_memory: False
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/evaluation/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- SARLabelEncode: # Class handling label
- SARRecResizeImg:
image_shape: [3, 48, 48, 160]
width_downsample_ratio: 0.25
- KeepKeys:
keep_keys: ['image', 'label', 'valid_ratio'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 64
num_workers: 4
use_shared_memory: False
Global:
use_gpu: True
epoch_num: 6
log_smooth_window: 50
print_batch_step: 50
save_model_dir: ./output/rec/rec_r32_gaspin_bilstm_att/
save_epoch_step: 3
# evaluation is run every 2000 iterations after the 4000th iteration
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path: ./ppocr/utils/dict/spin_dict.txt
max_text_length: 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_r32_gaspin_bilstm_att.txt
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
lr:
name: Piecewise
decay_epochs: [3, 4, 5]
values: [0.001, 0.0003, 0.00009, 0.000027]
clip_norm: 5
Architecture:
model_type: rec
algorithm: SPIN
in_channels: 1
Transform:
name: GA_SPIN
offsets: True
default_type: 6
loc_lr: 0.1
stn: True
Backbone:
name: ResNet32
out_channels: 512
Neck:
name: SequenceEncoder
encoder_type: cascadernn
hidden_size: 256
out_channels: [256, 512]
with_linear: True
Head:
name: SPINAttentionHead
hidden_size: 256
Loss:
name: SPINAttentionLoss
ignore_index: 0
PostProcess:
name: SPINLabelDecode
use_space_char: False
Metric:
name: RecMetric
main_indicator: acc
is_filter: True
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/ic15_data/
label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- SPINLabelEncode: # Class handling label
- SPINRecResizeImg:
image_shape: [100, 32]
interpolation : 2
mean: [127.5]
std: [127.5]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 8
drop_last: True
num_workers: 4
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/ic15_data
label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- SPINLabelEncode: # Class handling label
- SPINRecResizeImg:
image_shape: [100, 32]
interpolation : 2
mean: [127.5]
std: [127.5]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 8
num_workers: 2
Global:
use_gpu: true
epoch_num: 72
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/r34_vd_none_bilstm_ctc/
save_epoch_step: 3
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path:
max_text_length: 25
infer_mode: False
use_space_char: False
save_res_path: ./output/rec/predicts_r34_vd_none_bilstm_ctc.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
learning_rate: 0.0005
regularizer:
name: 'L2'
factor: 0
Architecture:
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: ResNet
layers: 34
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 256
Head:
name: CTCHead
fc_decay: 0
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 8
Eval:
dataset:
name: LMDBDataSet
data_dir: ./train_data/data_lmdb_release/validation/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment