rec_mtb_nrtr.yml 2.28 KB
Newer Older
Topdu's avatar
Topdu committed
1
2
3
4
5
Global:
  use_gpu: True
  epoch_num: 21
  log_smooth_window: 20
  print_batch_step: 10
Topdu's avatar
Topdu committed
6
  save_model_dir: ./output/rec/nrtr/
Topdu's avatar
Topdu committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
  save_epoch_step: 1
  # evaluation is run every 2000 iterations
  eval_batch_step: [0, 2000]
  cal_metric_during_train: True
  pretrained_model:
  checkpoints: 
  save_inference_dir:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path: 
  character_type: EN_symbol
  max_text_length: 25
  infer_mode: False
  use_space_char: True
  save_res_path: ./output/rec/predicts_nrtr.txt

Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.99
  clip_norm: 5.0
  lr:
    name: Cosine
    learning_rate: 0.0005
    warmup_epoch: 2
  regularizer:
    name: 'L2'
    factor: 0.

Architecture:
  model_type: rec
  algorithm: NRTR
  in_channels: 1
  Transform:
  Backbone:
    name: MTB
    cnn_num: 2
  Head:
Topdu's avatar
Topdu committed
46
    name: Transformer
Topdu's avatar
Topdu committed
47
48
    d_model: 512
    num_encoder_layers: 6
49
    beam_size: 10 # When Beam size is greater than 0, it means to use beam search when evaluation.
Topdu's avatar
Topdu committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
    

Loss:
  name: NRTRLoss
  smoothing: True

PostProcess:
  name: NRTRLabelDecode

Metric:
  name: RecMetric
  main_indicator: acc

Train:
  dataset:
    name: LMDBDataSet
topduke's avatar
topduke committed
66
    data_dir: ./train_data/data_lmdb_release/training/
Topdu's avatar
Topdu committed
67
68
69
70
71
    transforms:
      - NRTRDecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - NRTRLabelEncode: # Class handling label
Topdu's avatar
Topdu committed
72
      - NRTRRecResizeImg:
Topdu's avatar
Topdu committed
73
          image_shape: [100, 32]
Topdu's avatar
Topdu committed
74
          resize_type: PIL # PIL or OpenCV
Topdu's avatar
Topdu committed
75
76
77
78
79
80
81
82
83
84
85
      - KeepKeys:
          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
  loader:
    shuffle: True
    batch_size_per_card: 512
    drop_last: True
    num_workers: 8

Eval:
  dataset:
    name: LMDBDataSet
topduke's avatar
topduke committed
86
    data_dir: ./train_data/data_lmdb_release/evaluation/
Topdu's avatar
Topdu committed
87
88
89
90
91
    transforms:
      - NRTRDecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - NRTRLabelEncode: # Class handling label
Topdu's avatar
Topdu committed
92
      - NRTRRecResizeImg:
Topdu's avatar
Topdu committed
93
          image_shape: [100, 32]
Topdu's avatar
Topdu committed
94
          resize_type: PIL # PIL or OpenCV
Topdu's avatar
Topdu committed
95
96
97
98
99
100
101
102
      - KeepKeys:
          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 256
    num_workers: 1
    use_shared_memory: False