rtdetr_r50vd.yml 1.38 KB
Newer Older
wangkx1's avatar
wangkx1 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
architecture: DETR
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
norm_type: sync_bn
use_ema: True
ema_decay: 0.9999
ema_decay_type: "exponential"
ema_filter_no_grad: True
hidden_dim: 256
use_focal_loss: True
eval_size: [640, 640]


DETR:
  backbone: ResNet
  neck: HybridEncoder
  transformer: RTDETRTransformer
  detr_head: DINOHead
  post_process: DETRPostProcess

ResNet:
  # index 0 stands for res2
  depth: 50
  variant: d
  norm_type: bn
  freeze_at: 0
  return_idx: [1, 2, 3]
  lr_mult_list: [0.1, 0.1, 0.1, 0.1]
  num_stages: 4
  freeze_stem_only: True

HybridEncoder:
  hidden_dim: 256
  use_encoder_idx: [2]
  num_encoder_layers: 1
  encoder_layer:
    name: TransformerLayer
    d_model: 256
    nhead: 8
    dim_feedforward: 1024
    dropout: 0.
    activation: 'gelu'
  expansion: 1.0


RTDETRTransformer:
  num_queries: 300
  position_embed_type: sine
  feat_strides: [8, 16, 32]
  num_levels: 3
  nhead: 8
  num_decoder_layers: 6
  dim_feedforward: 1024
  dropout: 0.0
  activation: relu
  num_denoising: 100
  label_noise_ratio: 0.5
  box_noise_scale: 1.0
  learnt_init_query: False

DINOHead:
  loss:
    name: DINOLoss
    loss_coeff: {class: 1, bbox: 5, giou: 2}
    aux_loss: True
    use_vfl: True
    matcher:
      name: HungarianMatcher
      matcher_coeff: {class: 2, bbox: 5, giou: 2}

DETRPostProcess:
  num_top_queries: 300