layoutlm.yml 3.08 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
Global:
  use_gpu: True
  epoch_num: &epoch_num 200
  log_smooth_window: 10
  print_batch_step: 10
  save_model_dir: ./output/ser_layoutlm/
  save_epoch_step: 2000
  # evaluation is run every 10 iterations after the 0th iteration
  eval_batch_step: [ 0, 19 ]
  cal_metric_during_train: False
  save_inference_dir:
  use_visualdl: False
WenmuZhou's avatar
WenmuZhou committed
13
  infer_img: doc/vqa/input/zh_val_0.jpg
14
15
16
17
18
19
20
21
  save_res_path: ./output/ser/predicts_layoutlm.txt

Architecture:
  model_type: vqa
  algorithm: &algorithm "LayoutLM"
  Transform:
  Backbone:
    name: LayoutLMForSer
WenmuZhou's avatar
WenmuZhou committed
22
    pretrained: True
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
    checkpoints:
    num_classes: &num_classes 7

Loss:
  name: VQASerTokenLayoutLMLoss
  num_classes: *num_classes

Optimizer:
  name: AdamW
  beta1: 0.9
  beta2: 0.999
  lr:
    name: Linear
    learning_rate: 0.00005
    epochs: *epoch_num
    warmup_epoch: 2
  regularizer:
WenmuZhou's avatar
WenmuZhou committed
40
    name: L2
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
    factor: 0.00000
    
PostProcess:
  name: VQASerTokenLayoutLMPostProcess
  class_path: &class_path ppstructure/vqa/labels/labels_ser.txt

Metric:
  name: VQASerTokenMetric
  main_indicator: hmean

Train:
  dataset:
    name: SimpleDataSet
    data_dir: train_data/XFUND/zh_train/image
    label_file_list: 
      - train_data/XFUND/zh_train/xfun_normalize_train.json
    transforms:
      - DecodeImage: # load image
          img_mode: RGB
          channel_first: False
      - VQATokenLabelEncode: # Class handling label
          contains_re: False
          algorithm: *algorithm
          class_path: *class_path
      - VQATokenPad:
          max_seq_len: &max_seq_len 512
          return_attention_mask: True
      - VQASerTokenChunk:
          max_seq_len: *max_seq_len
      - Resize:
          size: [224,224]
      - NormalizeImage:
          scale: 1
          mean: [ 123.675, 116.28, 103.53 ]
          std: [ 58.395, 57.12, 57.375 ]
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'input_ids','labels', 'bbox', 'image', 'attention_mask', 'token_type_ids'] # dataloader will return list in this order
  loader:
    shuffle: True
    drop_last: False
    batch_size_per_card: 8
    num_workers: 4

Eval:
  dataset:
    name: SimpleDataSet
    data_dir: train_data/XFUND/zh_val/image
    label_file_list:
      - train_data/XFUND/zh_val/xfun_normalize_val.json
    transforms:
      - DecodeImage: # load image
          img_mode: RGB
          channel_first: False
      - VQATokenLabelEncode: # Class handling label
          contains_re: False
          algorithm: *algorithm
          class_path: *class_path
      - VQATokenPad:
          max_seq_len: *max_seq_len
          return_attention_mask: True
      - VQASerTokenChunk:
          max_seq_len: *max_seq_len
      - Resize:
          size: [224,224]
      - NormalizeImage:
          scale: 1
          mean: [ 123.675, 116.28, 103.53 ]
          std: [ 58.395, 57.12, 57.375 ]
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'input_ids', 'labels', 'bbox', 'image', 'attention_mask', 'token_type_ids'] # dataloader will return list in this order
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 8
    num_workers: 4