SLANet.yml 3.73 KB
Newer Older
sunzhq2's avatar
sunzhq2 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
Global:
  use_gpu: true
  epoch_num: 100
  log_smooth_window: 20
  print_batch_step: 20
  save_model_dir: ./output/SLANet
  save_epoch_step: 400
  # evaluation is run every 1000 iterations after the 0th iteration
  eval_batch_step: [0, 1000]
  cal_metric_during_train: True
  pretrained_model:
  checkpoints:
  save_inference_dir: ./output/SLANet/infer
  use_visualdl: False
  infer_img: ppstructure/docs/table/table.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  character_type: en
  max_text_length: &max_text_length 500
  box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
  infer_mode: False
  use_sync_bn: True
  save_res_path: 'output/infer'
  d2s_train_image_shape: [3, -1, -1]
  amp_custom_white_list: ['concat', 'elementwise_sub', 'set_value']

Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  clip_norm: 5.0
  lr:
    name: Piecewise
    learning_rate: 0.001
    decay_epochs : [40, 50]
    values : [0.001, 0.0001, 0.00005]
  regularizer:
    name: 'L2'
    factor: 0.00000

Architecture:
  model_type: table
  algorithm: SLANet
  Backbone:
    name: PPLCNet
    scale: 1.0
    pretrained: true
    use_ssld: true
  Neck:
    name: CSPPAN
    out_channels: 96
  Head:
    name: SLAHead
    hidden_size: 256
    max_text_length: *max_text_length
    loc_reg_num: &loc_reg_num 4

Loss:
  name: SLALoss
  structure_weight: 1.0
  loc_weight: 2.0
  loc_loss: smooth_l1

PostProcess:
  name: TableLabelDecode
  merge_no_span_structure: &merge_no_span_structure True

Metric:
  name: TableMetric
  main_indicator: acc
  compute_bbox_metric: False
  loc_reg_num: *loc_reg_num
  box_format: *box_format

Train:
  dataset:
    name: PubTabDataSet
    data_dir: train_data/table/pubtabnet/train/
    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - TableLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: *merge_no_span_structure
          replace_empty_cell_token: False
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - TableBoxEncode:
          in_box_format: *box_format
          out_box_format: *box_format
      - ResizeTableImage:
          max_len: 488
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - PaddingTableImage:
          size: [488, 488]
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  loader:
    shuffle: True
    batch_size_per_card: 48
    drop_last: True
    num_workers: 1

Eval:
  dataset:
    name: PubTabDataSet
    data_dir: train_data/table/pubtabnet/val/
    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - TableLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: *merge_no_span_structure
          replace_empty_cell_token: False
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - TableBoxEncode:
          in_box_format: *box_format
          out_box_format: *box_format
      - ResizeTableImage:
          max_len: 488
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - PaddingTableImage:
          size: [488, 488]
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 48
    num_workers: 1