rec_svtrnet_ch.yml 2.73 KB
Newer Older
wangsen's avatar
wangsen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
Global:
  use_gpu: true
  epoch_num: 100
  log_smooth_window: 20
  print_batch_step: 10
  save_model_dir: ./output/rec/svtr_ch_all/
  save_epoch_step: 10
  eval_batch_step:
  - 0
  - 2000
  cal_metric_during_train: true
  pretrained_model: null
  checkpoints: null
  save_inference_dir: null
  use_visualdl: false
  infer_img: doc/imgs_words/ch/word_1.jpg
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
  max_text_length: 25
  infer_mode: false
  use_space_char: true
  save_res_path: ./output/rec/predicts_svtr_tiny_ch_all.txt
Optimizer:
  name: AdamW
  beta1: 0.9
  beta2: 0.99
  epsilon: 8.0e-08
  weight_decay: 0.05
  no_weight_decay_name: norm pos_embed
  one_dim_param_no_weight_decay: true
  lr:
    name: Cosine
    learning_rate: 0.0005
    warmup_epoch: 2
Architecture:
  model_type: rec
  algorithm: SVTR
  Transform: null
  Backbone:
    name: SVTRNet
    img_size:
    - 32
    - 320
    out_char_num: 40
    out_channels: 96
    patch_merging: Conv
    embed_dim:
    - 64
    - 128
    - 256
    depth:
    - 3
    - 6
    - 3
    num_heads:
    - 2
    - 4
    - 8
    mixer:
    - Local
    - Local
    - Local
    - Local
    - Local
    - Local
    - Global
    - Global
    - Global
    - Global
    - Global
    - Global
    local_mixer:
    - - 7
      - 11
    - - 7
      - 11
    - - 7
      - 11
    last_stage: true
    prenorm: false
  Neck:
    name: SequenceEncoder
    encoder_type: reshape
  Head:
    name: CTCHead
Loss:
  name: CTCLoss
PostProcess:
  name: CTCLabelDecode
Metric:
  name: RecMetric
  main_indicator: acc
Train:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data
    label_file_list:
    - ./train_data/train_list.txt
    ext_op_transform_idx: 1
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - RecConAug:
        prob: 0.5
        ext_data_num: 2
        image_shape:
        - 32
        - 320
        - 3
    - RecAug: null
    - CTCLabelEncode: null
    - SVTRRecResizeImg:
        image_shape:
        - 3
        - 32
        - 320
        padding: true
    - KeepKeys:
        keep_keys:
        - image
        - label
        - length
  loader:
    shuffle: true
    batch_size_per_card: 256
    drop_last: true
    num_workers: 8
Eval:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data
    label_file_list:
    - ./train_data/val_list.txt
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - CTCLabelEncode: null
    - SVTRRecResizeImg:
        image_shape:
        - 3
        - 32
        - 320
        padding: true
    - KeepKeys:
        keep_keys:
        - image
        - label
        - length
  loader:
    shuffle: false
    drop_last: false
    batch_size_per_card: 256
    num_workers: 2
profiler_options: null