table_master.yml 3.66 KB
Newer Older
xuxo's avatar
xuxo committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
Global:
  use_gpu: true
  epoch_num: 17
  log_smooth_window: 20
  print_batch_step: 100
  save_model_dir: ./output/table_master/
  save_epoch_step: 17
  eval_batch_step: [0,  6259]
  cal_metric_during_train: true
  pretrained_model: null
  checkpoints:
  save_inference_dir: output/table_master/infer
  use_visualdl: false
  infer_img: ppstructure/docs/table/table.jpg
  save_res_path: ./output/table_master
  character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
  infer_mode: false
  max_text_length: &max_text_length 500
  box_format: &box_format 'xywh' # 'xywh', 'xyxy', 'xyxyxyxy'
  d2s_train_image_shape: [3, 480, 480]


Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  lr:
    name: MultiStepDecay
    learning_rate: 0.001
    milestones: [12, 15]
    gamma: 0.1
    warmup_epoch: 0.02
  regularizer:
    name: L2
    factor: 0.0

Architecture:
  model_type: table
  algorithm: TableMaster
  Backbone:
    name: TableResNetExtra
    gcb_config:
      ratio: 0.0625
      headers: 1
      att_scale: False
      fusion_type: channel_add
      layers: [False, True, True, True]
    layers: [1,2,5,3]
  Head:
    name: TableMasterHead
    hidden_size: 512
    headers: 8
    dropout: 0
    d_ff: 2024
    max_text_length: *max_text_length
    loc_reg_num: &loc_reg_num 4

Loss:
  name: TableMasterLoss
  ignore_index: 42 # set to len of dict + 3

PostProcess:
  name: TableMasterLabelDecode
  box_shape: pad
  merge_no_span_structure: &merge_no_span_structure True

Metric:
  name: TableMetric
  main_indicator: acc
  compute_bbox_metric: False
  box_format: *box_format

Train:
  dataset:
    name: PubTabDataSet
    data_dir: train_data/table/pubtabnet/train/
    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
    transforms:
      - DecodeImage:
          img_mode: BGR
          channel_first: False
      - TableMasterLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: *merge_no_span_structure
          replace_empty_cell_token: True
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - ResizeTableImage:
          max_len: 480
          resize_bboxes: True
      - PaddingTableImage:
          size: [480, 480]
      - TableBoxEncode:
          in_box_format: *box_format
          out_box_format: *box_format
      - NormalizeImage:
          scale: 1./255.
          mean: [0.5, 0.5, 0.5]
          std: [0.5, 0.5, 0.5]
          order: hwc
      - ToCHWImage: null
      - KeepKeys:
          keep_keys: [image, structure, bboxes, bbox_masks, shape]
  loader:
    shuffle: True
    batch_size_per_card: 10
    drop_last: True
    num_workers: 8

Eval:
  dataset:
    name: PubTabDataSet
    data_dir: train_data/table/pubtabnet/val/
    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
    transforms:
      - DecodeImage:
          img_mode: BGR
          channel_first: False
      - TableMasterLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: *merge_no_span_structure
          replace_empty_cell_token: True
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - ResizeTableImage:
          max_len: 480
          resize_bboxes: True
      - PaddingTableImage:
          size: [480, 480]
      - TableBoxEncode:
          in_box_format: *box_format
          out_box_format: *box_format
      - NormalizeImage:
          scale: 1./255.
          mean: [0.5, 0.5, 0.5]
          std: [0.5, 0.5, 0.5]
          order: hwc
      - ToCHWImage: null
      - KeepKeys:
          keep_keys: [image, structure, bboxes, bbox_masks, shape]
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 10
    num_workers: 8