architecture: DETR pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams norm_type: sync_bn use_ema: True ema_decay: 0.9999 ema_decay_type: "exponential" ema_filter_no_grad: True hidden_dim: 256 use_focal_loss: True eval_size: [640, 640] DETR: backbone: ResNet neck: HybridEncoder transformer: RTDETRTransformer detr_head: DINOHead post_process: DETRPostProcess ResNet: # index 0 stands for res2 depth: 50 variant: d norm_type: bn freeze_at: 0 return_idx: [1, 2, 3] lr_mult_list: [0.1, 0.1, 0.1, 0.1] num_stages: 4 freeze_stem_only: True HybridEncoder: hidden_dim: 256 use_encoder_idx: [2] num_encoder_layers: 1 encoder_layer: name: TransformerLayer d_model: 256 nhead: 8 dim_feedforward: 1024 dropout: 0. activation: 'gelu' expansion: 1.0 RTDETRTransformer: num_queries: 300 position_embed_type: sine feat_strides: [8, 16, 32] num_levels: 3 nhead: 8 num_decoder_layers: 6 dim_feedforward: 1024 dropout: 0.0 activation: relu num_denoising: 100 label_noise_ratio: 0.5 box_noise_scale: 1.0 learnt_init_query: False DINOHead: loss: name: DINOLoss loss_coeff: {class: 1, bbox: 5, giou: 2} aux_loss: True use_vfl: True matcher: name: HungarianMatcher matcher_coeff: {class: 2, bbox: 5, giou: 2} DETRPostProcess: num_top_queries: 300