Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dcnv3
Commits
21d9f185
Commit
21d9f185
authored
Feb 24, 2025
by
zhe chen
Browse files
Release detection model
parent
4dba489c
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
878 additions
and
3 deletions
+878
-3
detection/README.md
detection/README.md
+1
-0
detection/configs/coco/README.md
detection/configs/coco/README.md
+6
-0
detection/configs/coco/dino_4scale_cbinternimage_h_objects365_coco_ss.py
...gs/coco/dino_4scale_cbinternimage_h_objects365_coco_ss.py
+188
-0
detection/mmdet_custom/models/__init__.py
detection/mmdet_custom/models/__init__.py
+1
-0
detection/mmdet_custom/models/backbones/__init__.py
detection/mmdet_custom/models/backbones/__init__.py
+2
-1
detection/mmdet_custom/models/backbones/cbnet.py
detection/mmdet_custom/models/backbones/cbnet.py
+152
-0
detection/mmdet_custom/models/dense_heads/__init__.py
detection/mmdet_custom/models/dense_heads/__init__.py
+2
-1
detection/mmdet_custom/models/dense_heads/cbdino_head.py
detection/mmdet_custom/models/dense_heads/cbdino_head.py
+398
-0
detection/mmdet_custom/models/detectors/__init__.py
detection/mmdet_custom/models/detectors/__init__.py
+2
-1
detection/mmdet_custom/models/detectors/cbnet_dino.py
detection/mmdet_custom/models/detectors/cbnet_dino.py
+98
-0
detection/mmdet_custom/models/necks/__init__.py
detection/mmdet_custom/models/necks/__init__.py
+3
-0
detection/mmdet_custom/models/necks/cbnet_channel_mapper.py
detection/mmdet_custom/models/necks/cbnet_channel_mapper.py
+25
-0
No files found.
detection/README.md
View file @
21d9f185
...
...
@@ -67,6 +67,7 @@ pip install opencv-python termcolor yacs pyyaml scipy
# Please use a version of numpy lower than 2.0
pip
install
numpy
==
1.26.4
pip
install
pydantic
==
1.10.13
pip
install
yapf
==
0.40.1
```
-
Compile CUDA operators
...
...
detection/configs/coco/README.md
View file @
21d9f185
...
...
@@ -47,3 +47,9 @@ Based on community feedback, in 2017 the training/validation split was changed f
| InternImage-T | layer-wise lr | ImageNet-1K | 1x | 53.9 | 9.5h | 49M |
[
config
](
./dino_4scale_internimage_t_1x_coco_layer_wise_lr.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_t_1x_coco.pth
)
\|
[
log
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_t_1x_coco.json
)
|
| InternImage-L | layer-wise lr | ImageNet-22K | 1x | 57.5 | 18h | 241M |
[
config
](
./dino_4scale_internimage_l_1x_coco_layer_wise_lr.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_l_1x_coco_layer_wise_lr.pth
)
\|
[
log
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_l_1x_coco_layer_wise_lr.log.json
)
|
| InternImage-L | 0.1x backbone lr | ImageNet-22K | 1x | 57.6 | 18h | 241M |
[
config
](
./dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.pth
)
\|
[
log
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.log.json
)
|
### DINO + CB-InternImage
| backbone | pretrain | box mAP (ss) | box mAP (ms) | #param | Config | Download |
| :--------------: | :--------: | :----------: | :----------: | :----: | :-----------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------: |
| CB-InternImage-H | Objects365 | 64.5 | 65.0 | 2.18B |
[
config
](
./dino_4scale_cbinternimage_h_objects365_coco_ss.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_coco.pth
)
|
detection/configs/coco/dino_4scale_cbinternimage_h_objects365_coco_ss.py
0 → 100644
View file @
21d9f185
_base_
=
[
'../_base_/datasets/coco_detection.py'
,
'../_base_/default_runtime.py'
]
load_from
=
'https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_80classes.pth'
model
=
dict
(
type
=
'CBDINO'
,
backbone
=
dict
(
type
=
'CBInternImage'
,
core_op
=
'DCNv3'
,
channels
=
320
,
depths
=
[
6
,
6
,
32
,
6
],
groups
=
[
10
,
20
,
40
,
80
],
mlp_ratio
=
4.
,
drop_path_rate
=
0.5
,
norm_layer
=
'LN'
,
layer_scale
=
None
,
offset_scale
=
1.0
,
post_norm
=
False
,
dw_kernel_size
=
5
,
# for InternImage-H/G
res_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm_block_ids
=
[
5
,
11
,
17
,
23
,
29
],
# for InternImage-H/G
center_feature_scale
=
True
,
# for InternImage-H/G
with_cp
=
True
,
out_indices
=
[(
0
,
1
,
2
,
3
),
(
1
,
2
,
3
)],
init_cfg
=
None
,
),
neck
=
[
dict
(
type
=
'CBChannelMapper'
,
in_channels
=
[
640
,
1280
,
2560
],
kernel_size
=
1
,
out_channels
=
256
,
act_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
num_outs
=
4
)],
bbox_head
=
dict
(
type
=
'CBDINOHead'
,
num_query
=
900
,
num_classes
=
80
,
in_channels
=
2048
,
# TODO
sync_cls_avg_factor
=
True
,
as_two_stage
=
True
,
with_box_refine
=
True
,
dn_cfg
=
dict
(
type
=
'CdnQueryGenerator'
,
noise_scale
=
dict
(
label
=
0.5
,
box
=
1.0
),
# 0.5, 0.4 for DN-DETR
group_cfg
=
dict
(
dynamic
=
True
,
num_groups
=
None
,
num_dn_queries
=
1000
)),
transformer
=
dict
(
type
=
'DinoTransformer'
,
two_stage_num_proposals
=
900
,
encoder
=
dict
(
type
=
'DetrTransformerEncoder'
,
num_layers
=
6
,
transformerlayers
=
dict
(
type
=
'BaseTransformerLayer'
,
attn_cfgs
=
dict
(
type
=
'MultiScaleDeformableAttention'
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
decoder
=
dict
(
type
=
'DinoTransformerDecoder'
,
num_layers
=
6
,
return_intermediate
=
True
,
transformerlayers
=
dict
(
type
=
'DetrTransformerDecoderLayer'
,
attn_cfgs
=
[
dict
(
type
=
'MultiheadAttention'
,
embed_dims
=
256
,
num_heads
=
8
,
dropout
=
0.0
),
# 0.1 for DeformDETR
dict
(
type
=
'MultiScaleDeformableAttention'
,
num_levels
=
4
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
],
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)))),
positional_encoding
=
dict
(
type
=
'SinePositionalEncoding'
,
num_feats
=
128
,
temperature
=
20
,
normalize
=
True
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
# 2.0 in DeformDETR
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
5.0
),
loss_iou
=
dict
(
type
=
'GIoULoss'
,
loss_weight
=
2.0
)),
# training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'HungarianAssigner'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBoxL1Cost'
,
weight
=
5.0
,
box_format
=
'xywh'
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
iou_mode
=
'giou'
,
weight
=
2.0
)),
snip_cfg
=
dict
(
type
=
'v3'
,
weight
=
0.1
)),
test_cfg
=
dict
(
max_per_img
=
300
))
# TODO: Originally 100
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
# from the default setting in mmdet.
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
2000
,
600
),
(
2000
,
1800
)],
multiscale_mode
=
'range'
,
keep_ratio
=
True
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2000
,
1000
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
2
,
train
=
dict
(
filter_empty_gt
=
True
,
pipeline
=
train_pipeline
),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.0001
,
constructor
=
'CustomLayerDecayOptimizerConstructor'
,
paramwise_cfg
=
dict
(
num_layers
=
50
,
layer_decay_rate
=
0.94
,
depths
=
[
6
,
6
,
32
,
6
],
offset_lr_scale
=
1e-3
))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[])
runner
=
dict
(
type
=
'IterBasedRunner'
,
max_iters
=
20000
)
checkpoint_config
=
dict
(
interval
=
200
,
max_keep_ckpts
=
3
)
evaluation
=
dict
(
interval
=
200
,
save_best
=
'auto'
)
# resume_from = None
# custom_hooks = [
# dict(
# type='ExpMomentumEMAHook',
# resume_from=resume_from,
# momentum=0.0003,
# priority=49)
# ]
detection/mmdet_custom/models/__init__.py
View file @
21d9f185
...
...
@@ -7,4 +7,5 @@
from
.backbones
import
*
# noqa: F401,F403
from
.dense_heads
import
*
# noqa: F401,F403
from
.detectors
import
*
# noqa: F401,F403
from
.necks
import
*
# noqa: F401,F403
from
.utils
import
*
# noqa: F401,F403
detection/mmdet_custom/models/backbones/__init__.py
View file @
21d9f185
...
...
@@ -4,6 +4,7 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from
.cbnet
import
CBInternImage
from
.intern_image
import
InternImage
__all__
=
[
'InternImage'
]
__all__
=
[
'InternImage'
,
'CBInternImage'
]
detection/mmdet_custom/models/backbones/cbnet.py
0 → 100644
View file @
21d9f185
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
constant_init
from
mmcv.runner
import
BaseModule
from
mmdet.models.builder
import
BACKBONES
from
torch.nn.modules.batchnorm
import
_BatchNorm
from
.intern_image
import
InternImage
class
LayerScale
(
nn
.
Module
):
def
__init__
(
self
,
init_values
=
0.
,
dim
=
1024
):
super
(
LayerScale
,
self
).
__init__
()
self
.
gamma
=
nn
.
Parameter
(
init_values
*
torch
.
ones
((
dim
,
1
,
1
)),
requires_grad
=
True
)
def
forward
(
self
,
x
):
return
self
.
gamma
*
x
class
_InternImage
(
InternImage
):
def
__init__
(
self
,
cb_idx
,
**
kwargs
):
super
(
_InternImage
,
self
).
__init__
(
**
kwargs
)
self
.
cb_idx
=
cb_idx
self
.
num_features_list
=
[
int
(
self
.
channels
*
2
**
i
)
for
i
in
range
(
self
.
num_levels
)]
if
cb_idx
==
1
:
self
.
gamma0
=
nn
.
Parameter
(
torch
.
zeros
((
self
.
num_features_list
[
0
])),
requires_grad
=
True
)
self
.
gamma1
=
nn
.
Parameter
(
torch
.
zeros
((
self
.
num_features_list
[
1
])),
requires_grad
=
True
)
self
.
gamma2
=
nn
.
Parameter
(
torch
.
zeros
((
self
.
num_features_list
[
2
])),
requires_grad
=
True
)
self
.
gamma3
=
nn
.
Parameter
(
torch
.
zeros
((
self
.
num_features_list
[
3
])),
requires_grad
=
True
)
def
del_layers
(
self
,
del_stages
):
self
.
del_stages
=
del_stages
if
self
.
del_stages
>=
0
:
del
self
.
patch_embed
def
forward
(
self
,
x
,
cb_feats
=
None
,
pre_tmps
=
None
):
outs
,
tmps
=
[],
[]
if
hasattr
(
self
,
'patch_embed'
):
x
=
self
.
patch_embed
(
x
)
x
=
self
.
pos_drop
(
x
)
Wh
,
Ww
=
x
.
size
(
1
),
x
.
size
(
2
)
tmps
.
append
((
x
,
Wh
,
Ww
))
else
:
x
,
Wh
,
Ww
=
pre_tmps
[
0
]
for
i
,
level
in
enumerate
(
self
.
levels
):
if
cb_feats
is
not
None
:
gamma
=
getattr
(
self
,
f
'gamma
{
i
}
'
)
x
=
x
+
gamma
.
half
()
*
cb_feats
[
i
]
# [B, H, W, C]
x
,
x_
=
level
(
x
,
return_wo_downsample
=
True
)
if
i
in
self
.
out_indices
:
outs
.
append
(
x_
.
permute
(
0
,
3
,
1
,
2
).
contiguous
())
return
tuple
(
outs
),
tmps
def
train
(
self
,
mode
=
True
):
super
(
_InternImage
,
self
).
train
(
mode
)
@
BACKBONES
.
register_module
()
class
CBInternImage
(
BaseModule
):
def
__init__
(
self
,
channels
=
96
,
out_indices
=
None
,
cb_zero_init
=
True
,
cb_del_stages
=
1
,
**
kwargs
):
super
(
CBInternImage
,
self
).
__init__
()
self
.
cb_zero_init
=
cb_zero_init
self
.
cb_del_stages
=
cb_del_stages
self
.
out_indices
=
out_indices
assert
len
(
out_indices
)
==
2
self
.
cb_modules
=
nn
.
ModuleList
()
for
cb_idx
in
range
(
2
):
cb_module
=
_InternImage
(
channels
=
channels
,
out_indices
=
out_indices
[
cb_idx
],
cb_idx
=
cb_idx
,
**
kwargs
)
if
cb_idx
>
0
:
cb_module
.
del_layers
(
cb_del_stages
)
self
.
cb_modules
.
append
(
cb_module
)
self
.
num_layers
=
self
.
cb_modules
[
0
].
num_layers
cb_inplanes
=
[
channels
*
2
**
i
for
i
in
range
(
self
.
num_layers
)]
self
.
cb_linears
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
num_layers
):
linears
=
nn
.
ModuleList
()
if
i
>=
self
.
cb_del_stages
-
1
:
jrange
=
4
-
i
for
j
in
range
(
jrange
):
if
cb_inplanes
[
i
+
j
]
!=
cb_inplanes
[
i
]:
layer
=
nn
.
Conv2d
(
cb_inplanes
[
i
+
j
],
cb_inplanes
[
i
],
1
)
else
:
layer
=
nn
.
Identity
()
linears
.
append
(
layer
)
self
.
cb_linears
.
append
(
linears
)
def
init_weights
(
self
):
for
m
in
self
.
cb_modules
:
m
.
init_weights
()
def
spatial_interpolate
(
self
,
x
,
H
,
W
):
if
H
!=
x
.
shape
[
2
]
or
W
!=
x
.
shape
[
3
]:
x
=
F
.
interpolate
(
x
,
size
=
(
H
,
W
),
mode
=
'nearest'
)
return
x
def
_get_cb_feats
(
self
,
feats
,
tmps
):
cb_feats
=
[]
Wh
,
Ww
=
tmps
[
0
][
1
:
3
]
for
i
in
range
(
self
.
num_layers
):
feed
=
0
if
i
>=
self
.
cb_del_stages
-
1
:
jrange
=
4
-
i
for
j
in
range
(
jrange
):
tmp
=
self
.
cb_linears
[
i
][
j
](
feats
[
j
+
i
])
tmp
=
self
.
spatial_interpolate
(
tmp
,
Wh
,
Ww
)
tmp
=
tmp
.
permute
(
0
,
2
,
3
,
1
)
# [B, H, W, C]
feed
+=
tmp
cb_feats
.
append
(
feed
)
Wh
,
Ww
=
Wh
//
2
,
Ww
//
2
return
cb_feats
def
forward
(
self
,
x
):
outs
=
[]
for
i
,
module
in
enumerate
(
self
.
cb_modules
):
if
i
==
0
:
feats
,
tmps
=
module
(
x
)
else
:
feats
,
tmps
=
module
(
x
,
cb_feats
,
tmps
)
outs
.
append
(
feats
)
if
i
<
len
(
self
.
cb_modules
)
-
1
:
cb_feats
=
self
.
_get_cb_feats
(
outs
[
-
1
],
tmps
)
if
len
(
self
.
out_indices
[
0
])
==
len
(
self
.
out_indices
[
1
])
+
1
:
outs
[
0
]
=
outs
[
0
][
1
:]
return
tuple
(
outs
)
def
train
(
self
,
mode
=
True
):
super
(
CBInternImage
,
self
).
train
(
mode
)
for
m
in
self
.
cb_modules
:
m
.
train
(
mode
=
mode
)
for
m
in
self
.
cb_linears
.
modules
():
if
isinstance
(
m
,
_BatchNorm
):
m
.
eval
()
detection/mmdet_custom/models/dense_heads/__init__.py
View file @
21d9f185
...
...
@@ -4,8 +4,9 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from
.cbdino_head
import
CBDINOHead
from
.deformable_detr_head
import
DeformableDETRHead
from
.detr_head
import
DETRHead
from
.dino_head
import
DINOHead
__all__
=
[
'DeformableDETRHead'
,
'DETRHead'
,
'DINOHead'
]
__all__
=
[
'DeformableDETRHead'
,
'DETRHead'
,
'DINOHead'
,
'CBDINOHead'
]
detection/mmdet_custom/models/dense_heads/cbdino_head.py
0 → 100644
View file @
21d9f185
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmdet.core
import
(
bbox_cxcywh_to_xyxy
,
bbox_xyxy_to_cxcywh
,
multi_apply
,
reduce_mean
)
from
mmdet.models.builder
import
HEADS
from
mmdet.models.utils.transformer
import
inverse_sigmoid
from
..utils
import
build_dn_generator
from
.deformable_detr_head
import
DeformableDETRHead
@
HEADS
.
register_module
()
class
CBDINOHead
(
DeformableDETRHead
):
def
__init__
(
self
,
cb_first_weight
=
0.5
,
*
args
,
dn_cfg
=
None
,
**
kwargs
):
super
(
CBDINOHead
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_init_layers
()
self
.
init_denoising
(
dn_cfg
)
self
.
cb_first_weight
=
cb_first_weight
assert
self
.
as_two_stage
,
\
'as_two_stage must be True for DINO'
assert
self
.
with_box_refine
,
\
'with_box_refine must be True for DINO'
def
_init_layers
(
self
):
super
().
_init_layers
()
self
.
label_embedding
=
nn
.
Embedding
(
self
.
cls_out_channels
,
self
.
embed_dims
)
def
init_denoising
(
self
,
dn_cfg
):
if
dn_cfg
is
not
None
:
dn_cfg
[
'num_classes'
]
=
self
.
num_classes
dn_cfg
[
'num_queries'
]
=
self
.
num_query
dn_cfg
[
'hidden_dim'
]
=
self
.
embed_dims
self
.
dn_generator
=
build_dn_generator
(
dn_cfg
)
def
upd_loss
(
self
,
losses
,
idx
,
weight
):
new_losses
=
dict
()
for
k
,
v
in
losses
.
items
():
new_k
=
'{}{}'
.
format
(
k
,
idx
)
if
weight
!=
1
and
'loss'
in
k
:
new_k
=
'{}_w{}'
.
format
(
new_k
,
weight
)
if
isinstance
(
v
,
list
)
or
isinstance
(
v
,
tuple
):
new_losses
[
new_k
]
=
[
i
*
weight
for
i
in
v
]
else
:
new_losses
[
new_k
]
=
v
*
weight
return
new_losses
def
forward_train
(
self
,
x
,
img_metas
,
gt_bboxes
,
gt_labels
=
None
,
gt_bboxes_ignore
=
None
,
proposal_cfg
=
None
,
**
kwargs
):
assert
proposal_cfg
is
None
,
'"proposal_cfg" must be None'
assert
self
.
dn_generator
is
not
None
,
'"dn_cfg" must be set'
dn_label_query
,
dn_bbox_query
,
attn_mask
,
dn_meta
=
\
self
.
dn_generator
(
gt_bboxes
,
gt_labels
,
self
.
label_embedding
,
img_metas
)
outs
=
self
(
x
,
img_metas
,
dn_label_query
,
dn_bbox_query
,
attn_mask
)
out1
=
tuple
(
out
[...,
0
:
1
,
:,
:]
for
out
in
outs
)
out2
=
tuple
(
out
[...,
1
:
2
,
:,
:]
for
out
in
outs
)
if
gt_labels
is
None
:
loss_inputs1
=
out1
+
([
gt_bboxes
[
0
]],
[
img_metas
[
0
]],
dn_meta
)
loss_inputs2
=
out2
+
([
gt_bboxes
[
1
]],
[
img_metas
[
1
]],
dn_meta
)
else
:
loss_inputs1
=
out1
+
([
gt_bboxes
[
0
]],
[
gt_labels
[
0
]],
[
img_metas
[
0
]],
dn_meta
)
loss_inputs2
=
out2
+
([
gt_bboxes
[
1
]],
[
gt_labels
[
1
]],
[
img_metas
[
1
]],
dn_meta
)
loss_inputs
=
(
loss_inputs1
,
loss_inputs2
)
losses
=
dict
()
loss_weights
=
[
self
.
cb_first_weight
]
+
[
1
]
*
1
for
i
in
range
(
2
):
sub_losses
=
self
.
loss
(
*
loss_inputs
[
i
],
gt_bboxes_ignore
=
gt_bboxes_ignore
)
sub_losses
=
self
.
upd_loss
(
sub_losses
,
idx
=
i
,
weight
=
loss_weights
[
i
])
losses
.
update
(
sub_losses
)
return
losses
def
forward
(
self
,
mlvl_feats
,
img_metas
,
dn_label_query
=
None
,
dn_bbox_query
=
None
,
attn_mask
=
None
):
batch_size
=
mlvl_feats
[
0
].
size
(
0
)
input_img_h
,
input_img_w
=
img_metas
[
0
][
'batch_input_shape'
]
img_masks
=
mlvl_feats
[
0
].
new_ones
(
(
batch_size
,
input_img_h
,
input_img_w
))
for
img_id
in
range
(
batch_size
):
img_h
,
img_w
,
_
=
img_metas
[
img_id
][
'img_shape'
]
img_masks
[
img_id
,
:
img_h
,
:
img_w
]
=
0
mlvl_masks
=
[]
mlvl_positional_encodings
=
[]
for
feat
in
mlvl_feats
:
mlvl_masks
.
append
(
F
.
interpolate
(
img_masks
[
None
],
size
=
feat
.
shape
[
-
2
:]).
to
(
torch
.
bool
).
squeeze
(
0
))
mlvl_positional_encodings
.
append
(
self
.
positional_encoding
(
mlvl_masks
[
-
1
]))
query_embeds
=
None
hs
,
inter_references
,
topk_score
,
topk_anchor
=
\
self
.
transformer
(
mlvl_feats
,
mlvl_masks
,
query_embeds
,
mlvl_positional_encodings
,
dn_label_query
,
dn_bbox_query
,
attn_mask
,
reg_branches
=
self
.
reg_branches
if
self
.
with_box_refine
else
None
,
# noqa:E501
cls_branches
=
self
.
cls_branches
if
self
.
as_two_stage
else
None
# noqa:E501
)
hs
=
hs
.
permute
(
0
,
2
,
1
,
3
)
if
dn_label_query
is
not
None
and
dn_label_query
.
size
(
1
)
==
0
:
# NOTE: If there is no target in the image, the parameters of
# label_embedding won't be used in producing loss, which raises
# RuntimeError when using distributed mode.
hs
[
0
]
+=
self
.
label_embedding
.
weight
[
0
,
0
]
*
0.0
outputs_classes
=
[]
outputs_coords
=
[]
for
lvl
in
range
(
hs
.
shape
[
0
]):
reference
=
inter_references
[
lvl
]
reference
=
inverse_sigmoid
(
reference
,
eps
=
1e-3
)
outputs_class
=
self
.
cls_branches
[
lvl
](
hs
[
lvl
])
tmp
=
self
.
reg_branches
[
lvl
](
hs
[
lvl
])
if
reference
.
shape
[
-
1
]
==
4
:
tmp
+=
reference
else
:
assert
reference
.
shape
[
-
1
]
==
2
tmp
[...,
:
2
]
+=
reference
outputs_coord
=
tmp
.
sigmoid
()
outputs_classes
.
append
(
outputs_class
)
outputs_coords
.
append
(
outputs_coord
)
outputs_classes
=
torch
.
stack
(
outputs_classes
)
outputs_coords
=
torch
.
stack
(
outputs_coords
)
return
outputs_classes
,
outputs_coords
,
topk_score
,
topk_anchor
def
loss
(
self
,
all_cls_scores
,
all_bbox_preds
,
enc_topk_scores
,
enc_topk_anchors
,
gt_bboxes_list
,
gt_labels_list
,
img_metas
,
dn_meta
=
None
,
gt_bboxes_ignore
=
None
):
assert
gt_bboxes_ignore
is
None
,
\
f
'
{
self
.
__class__
.
__name__
}
only supports '
\
f
'for gt_bboxes_ignore setting to None.'
loss_dict
=
dict
()
# extract denoising and matching part of outputs
all_cls_scores
,
all_bbox_preds
,
dn_cls_scores
,
dn_bbox_preds
=
\
self
.
extract_dn_outputs
(
all_cls_scores
,
all_bbox_preds
,
dn_meta
)
if
enc_topk_scores
is
not
None
:
# calculate loss from encode feature maps
# NOTE The DeformDETR calculate binary cls loss
# for all encoder embeddings, while DINO calculate
# multi-class loss for topk embeddings.
enc_loss_cls
,
enc_losses_bbox
,
enc_losses_iou
=
\
self
.
loss_single
(
enc_topk_scores
,
enc_topk_anchors
,
gt_bboxes_list
,
gt_labels_list
,
img_metas
,
gt_bboxes_ignore
)
# collate loss from encode feature maps
loss_dict
[
'interm_loss_cls'
]
=
enc_loss_cls
loss_dict
[
'interm_loss_bbox'
]
=
enc_losses_bbox
loss_dict
[
'interm_loss_iou'
]
=
enc_losses_iou
# calculate loss from all decoder layers
num_dec_layers
=
len
(
all_cls_scores
)
all_gt_bboxes_list
=
[
gt_bboxes_list
for
_
in
range
(
num_dec_layers
)]
all_gt_labels_list
=
[
gt_labels_list
for
_
in
range
(
num_dec_layers
)]
all_gt_bboxes_ignore_list
=
[
gt_bboxes_ignore
for
_
in
range
(
num_dec_layers
)
]
img_metas_list
=
[
img_metas
for
_
in
range
(
num_dec_layers
)]
losses_cls
,
losses_bbox
,
losses_iou
=
multi_apply
(
self
.
loss_single
,
all_cls_scores
,
all_bbox_preds
,
all_gt_bboxes_list
,
all_gt_labels_list
,
img_metas_list
,
all_gt_bboxes_ignore_list
)
# collate loss from the last decoder layer
loss_dict
[
'loss_cls'
]
=
losses_cls
[
-
1
]
loss_dict
[
'loss_bbox'
]
=
losses_bbox
[
-
1
]
loss_dict
[
'loss_iou'
]
=
losses_iou
[
-
1
]
# collate loss from other decoder layers
num_dec_layer
=
0
for
loss_cls_i
,
loss_bbox_i
,
loss_iou_i
in
zip
(
losses_cls
[:
-
1
],
losses_bbox
[:
-
1
],
losses_iou
[:
-
1
]):
loss_dict
[
f
'd
{
num_dec_layer
}
.loss_cls'
]
=
loss_cls_i
loss_dict
[
f
'd
{
num_dec_layer
}
.loss_bbox'
]
=
loss_bbox_i
loss_dict
[
f
'd
{
num_dec_layer
}
.loss_iou'
]
=
loss_iou_i
num_dec_layer
+=
1
if
dn_cls_scores
is
not
None
:
# calculate denoising loss from all decoder layers
dn_meta
=
[
dn_meta
for
_
in
img_metas
]
tmp
=
self
.
loss_dn
(
dn_cls_scores
,
dn_bbox_preds
,
gt_bboxes_list
,
gt_labels_list
,
img_metas
,
dn_meta
)
# print(tmp)
if
len
(
tmp
)
==
0
:
print
(
dn_cls_scores
)
dn_losses_cls
,
dn_losses_bbox
,
dn_losses_iou
=
tmp
# collate denoising loss
loss_dict
[
'dn_loss_cls'
]
=
dn_losses_cls
[
-
1
]
loss_dict
[
'dn_loss_bbox'
]
=
dn_losses_bbox
[
-
1
]
loss_dict
[
'dn_loss_iou'
]
=
dn_losses_iou
[
-
1
]
num_dec_layer
=
0
for
loss_cls_i
,
loss_bbox_i
,
loss_iou_i
in
zip
(
dn_losses_cls
[:
-
1
],
dn_losses_bbox
[:
-
1
],
dn_losses_iou
[:
-
1
]):
loss_dict
[
f
'd
{
num_dec_layer
}
.dn_loss_cls'
]
=
loss_cls_i
loss_dict
[
f
'd
{
num_dec_layer
}
.dn_loss_bbox'
]
=
loss_bbox_i
loss_dict
[
f
'd
{
num_dec_layer
}
.dn_loss_iou'
]
=
loss_iou_i
num_dec_layer
+=
1
# assert len(loss_dict.keys()) == 39, "number of keys must be 39!"
return
loss_dict
def
loss_dn
(
self
,
dn_cls_scores
,
dn_bbox_preds
,
gt_bboxes_list
,
gt_labels_list
,
img_metas
,
dn_meta
):
num_dec_layers
=
len
(
dn_cls_scores
)
all_gt_bboxes_list
=
[
gt_bboxes_list
for
_
in
range
(
num_dec_layers
)]
all_gt_labels_list
=
[
gt_labels_list
for
_
in
range
(
num_dec_layers
)]
img_metas_list
=
[
img_metas
for
_
in
range
(
num_dec_layers
)]
dn_meta_list
=
[
dn_meta
for
_
in
range
(
num_dec_layers
)]
return
multi_apply
(
self
.
loss_dn_single
,
dn_cls_scores
,
dn_bbox_preds
,
all_gt_bboxes_list
,
all_gt_labels_list
,
img_metas_list
,
dn_meta_list
)
def
loss_dn_single
(
self
,
dn_cls_scores
,
dn_bbox_preds
,
gt_bboxes_list
,
gt_labels_list
,
img_metas
,
dn_meta
):
num_imgs
=
dn_cls_scores
.
size
(
0
)
bbox_preds_list
=
[
dn_bbox_preds
[
i
]
for
i
in
range
(
num_imgs
)]
cls_reg_targets
=
self
.
get_dn_target
(
bbox_preds_list
,
gt_bboxes_list
,
gt_labels_list
,
img_metas
,
dn_meta
)
(
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
)
=
cls_reg_targets
labels
=
torch
.
cat
(
labels_list
,
0
)
label_weights
=
torch
.
cat
(
label_weights_list
,
0
)
bbox_targets
=
torch
.
cat
(
bbox_targets_list
,
0
)
bbox_weights
=
torch
.
cat
(
bbox_weights_list
,
0
)
# classification loss
cls_scores
=
dn_cls_scores
.
reshape
(
-
1
,
self
.
cls_out_channels
)
# construct weighted avg_factor to match with the official DETR repo
cls_avg_factor
=
\
num_total_pos
*
1.0
+
num_total_neg
*
self
.
bg_cls_weight
if
self
.
sync_cls_avg_factor
:
cls_avg_factor
=
reduce_mean
(
cls_scores
.
new_tensor
([
cls_avg_factor
]))
cls_avg_factor
=
max
(
cls_avg_factor
,
1
)
if
len
(
cls_scores
)
>
0
:
loss_cls
=
self
.
loss_cls
(
cls_scores
,
labels
,
label_weights
,
avg_factor
=
cls_avg_factor
)
else
:
loss_cls
=
torch
.
zeros
(
# TODO: How to better return zero loss
1
,
dtype
=
cls_scores
.
dtype
,
device
=
cls_scores
.
device
)
# Compute the average number of gt boxes across all gpus, for
# normalization purposes
num_total_pos
=
loss_cls
.
new_tensor
([
num_total_pos
])
num_total_pos
=
torch
.
clamp
(
reduce_mean
(
num_total_pos
),
min
=
1
).
item
()
# construct factors used for rescale bboxes
factors
=
[]
for
img_meta
,
bbox_pred
in
zip
(
img_metas
,
dn_bbox_preds
):
img_h
,
img_w
,
_
=
img_meta
[
'img_shape'
]
factor
=
bbox_pred
.
new_tensor
([
img_w
,
img_h
,
img_w
,
img_h
]).
unsqueeze
(
0
).
repeat
(
bbox_pred
.
size
(
0
),
1
)
factors
.
append
(
factor
)
factors
=
torch
.
cat
(
factors
,
0
)
# DETR regress the relative position of boxes (cxcywh) in the image,
# thus the learning target is normalized by the image size. So here
# we need to re-scale them for calculating IoU loss
bbox_preds
=
dn_bbox_preds
.
reshape
(
-
1
,
4
)
bboxes
=
bbox_cxcywh_to_xyxy
(
bbox_preds
)
*
factors
bboxes_gt
=
bbox_cxcywh_to_xyxy
(
bbox_targets
)
*
factors
# regression IoU loss, defaultly GIoU loss
loss_iou
=
self
.
loss_iou
(
bboxes
,
bboxes_gt
,
bbox_weights
,
avg_factor
=
num_total_pos
)
# regression L1 loss
loss_bbox
=
self
.
loss_bbox
(
bbox_preds
,
bbox_targets
,
bbox_weights
,
avg_factor
=
num_total_pos
)
return
loss_cls
,
loss_bbox
,
loss_iou
def
get_dn_target
(
self
,
dn_bbox_preds_list
,
gt_bboxes_list
,
gt_labels_list
,
img_metas
,
dn_meta
):
(
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
pos_inds_list
,
neg_inds_list
)
=
multi_apply
(
self
.
_get_dn_target_single
,
dn_bbox_preds_list
,
gt_bboxes_list
,
gt_labels_list
,
img_metas
,
dn_meta
)
num_total_pos
=
sum
((
inds
.
numel
()
for
inds
in
pos_inds_list
))
num_total_neg
=
sum
((
inds
.
numel
()
for
inds
in
neg_inds_list
))
return
(
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
)
def
_get_dn_target_single
(
self
,
dn_bbox_pred
,
gt_bboxes
,
gt_labels
,
img_meta
,
dn_meta
):
num_groups
=
dn_meta
[
'num_dn_group'
]
pad_size
=
dn_meta
[
'pad_size'
]
assert
pad_size
%
num_groups
==
0
single_pad
=
pad_size
//
num_groups
num_bboxes
=
dn_bbox_pred
.
size
(
0
)
if
len
(
gt_labels
)
>
0
:
t
=
torch
.
range
(
0
,
len
(
gt_labels
)
-
1
).
long
().
cuda
()
t
=
t
.
unsqueeze
(
0
).
repeat
(
num_groups
,
1
)
pos_assigned_gt_inds
=
t
.
flatten
()
pos_inds
=
(
torch
.
tensor
(
range
(
num_groups
))
*
single_pad
).
long
().
cuda
().
unsqueeze
(
1
)
+
t
pos_inds
=
pos_inds
.
flatten
()
else
:
pos_inds
=
pos_assigned_gt_inds
=
torch
.
tensor
([]).
long
().
cuda
()
neg_inds
=
pos_inds
+
single_pad
//
2
# label targets
labels
=
gt_bboxes
.
new_full
((
num_bboxes
,
),
self
.
num_classes
,
dtype
=
torch
.
long
)
labels
[
pos_inds
]
=
gt_labels
[
pos_assigned_gt_inds
]
label_weights
=
gt_bboxes
.
new_ones
(
num_bboxes
)
# bbox targets
bbox_targets
=
torch
.
zeros_like
(
dn_bbox_pred
)
bbox_weights
=
torch
.
zeros_like
(
dn_bbox_pred
)
bbox_weights
[
pos_inds
]
=
1.0
img_h
,
img_w
,
_
=
img_meta
[
'img_shape'
]
# DETR regress the relative position of boxes (cxcywh) in the image.
# Thus the learning target should be normalized by the image size, also
# the box format should be converted from defaultly x1y1x2y2 to cxcywh.
factor
=
dn_bbox_pred
.
new_tensor
([
img_w
,
img_h
,
img_w
,
img_h
]).
unsqueeze
(
0
)
gt_bboxes_normalized
=
gt_bboxes
/
factor
gt_bboxes_targets
=
bbox_xyxy_to_cxcywh
(
gt_bboxes_normalized
)
bbox_targets
[
pos_inds
]
=
gt_bboxes_targets
.
repeat
([
num_groups
,
1
])
return
(
labels
,
label_weights
,
bbox_targets
,
bbox_weights
,
pos_inds
,
neg_inds
)
@
staticmethod
def
extract_dn_outputs
(
all_cls_scores
,
all_bbox_preds
,
dn_meta
):
# if dn_meta and dn_meta['pad_size'] > 0:
if
dn_meta
is
not
None
:
denoising_cls_scores
=
all_cls_scores
[:,
:,
:
dn_meta
[
'pad_size'
],
:]
denoising_bbox_preds
=
all_bbox_preds
[:,
:,
:
dn_meta
[
'pad_size'
],
:]
matching_cls_scores
=
all_cls_scores
[:,
:,
dn_meta
[
'pad_size'
]:,
:]
matching_bbox_preds
=
all_bbox_preds
[:,
:,
dn_meta
[
'pad_size'
]:,
:]
else
:
denoising_cls_scores
=
None
denoising_bbox_preds
=
None
matching_cls_scores
=
all_cls_scores
matching_bbox_preds
=
all_bbox_preds
return
(
matching_cls_scores
,
matching_bbox_preds
,
denoising_cls_scores
,
denoising_bbox_preds
)
def
tta_test_bboxes
(
self
,
feats
,
img_metas
,
rescale
=
False
):
"""Test det bboxes without test-time augmentation.
Args:
feats (tuple[torch.Tensor]): Multi-level features from the
upstream network, each is a 4D-tensor.
img_metas (list[dict]): List of image information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
"""
# forward of this head requires img_metas
outs
=
self
.
forward
(
feats
,
img_metas
)
all_cls_scores
,
all_bbox_preds
,
enc_cls_scores
,
enc_bbox_preds
=
outs
cls_scores
=
all_cls_scores
[
-
1
]
bbox_preds
=
all_bbox_preds
[
-
1
]
return
bbox_preds
,
cls_scores
detection/mmdet_custom/models/detectors/__init__.py
View file @
21d9f185
...
...
@@ -4,6 +4,7 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from
.cbnet_dino
import
CBDINO
from
.dino
import
DINO
__all__
=
[
'DINO'
]
__all__
=
[
'DINO'
,
'CBDINO'
]
detection/mmdet_custom/models/detectors/cbnet_dino.py
0 → 100644
View file @
21d9f185
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
mmdet.core
import
bbox2result
from
mmdet.models.builder
import
DETECTORS
from
.dino
import
DINO
@
DETECTORS
.
register_module
()
class
CBDINO
(
DINO
):
def
__init__
(
self
,
rule
=
None
,
**
kwargs
):
super
(
CBDINO
,
self
).
__init__
(
**
kwargs
)
for
k
,
v
in
self
.
named_parameters
():
if
rule
==
'freeze_backbone_expect_level_4'
:
if
'backbone'
in
k
and
'backbone.cb_modules.1.levels.3'
not
in
k
:
v
.
requires_grad
=
False
if
rule
==
'freeze_backbone_expect_0_level_1_2_3'
:
if
'backbone'
in
k
and
'backbone.cb_modules.0.levels.0'
not
in
k
\
and
'backbone.cb_modules.1.levels.0'
not
in
k
\
and
'backbone.cb_modules.2.levels.0'
not
in
k
:
v
.
requires_grad
=
False
if
rule
==
'freeze_backbone_expect_level_3_4'
:
if
'backbone'
in
k
and
'backbone.cb_modules.1.levels.2'
not
in
k
\
and
'backbone.cb_modules.1.levels.3'
not
in
k
:
v
.
requires_grad
=
False
if
rule
==
'freeze_cb_first_backbone'
:
if
'backbone.cb_modules.0'
in
k
:
v
.
requires_grad
=
False
if
rule
==
'freeze_cb_first_backbone_expect_level_4'
:
if
'backbone.cb_modules.0'
in
k
and
'levels.3'
not
in
k
:
v
.
requires_grad
=
False
if
rule
==
'freeze_backbone'
:
if
'backbone'
in
k
:
v
.
requires_grad
=
False
if
rule
==
'freeze_backbone_encoder'
:
if
'backbone'
in
k
or
'encoder'
in
k
:
v
.
requires_grad
=
False
if
rule
==
'freeze_backbone_neck'
:
if
'backbone'
in
k
or
'neck'
in
k
:
v
.
requires_grad
=
False
if
rule
==
'freeze_stage_1_2'
:
if
'patch_embed'
in
k
:
v
.
requires_grad
=
False
if
'levels.0.'
in
k
or
'levels.1.'
in
k
:
v
.
requires_grad
=
False
if
rule
==
'freeze_stage_1'
:
if
'patch_embed'
in
k
:
v
.
requires_grad
=
False
if
'levels.0.'
in
k
:
v
.
requires_grad
=
False
def
forward_train
(
self
,
img
,
img_metas
,
gt_bboxes
,
gt_labels
,
gt_bboxes_ignore
=
None
,
gt_masks
=
None
,
proposals
=
None
,
loss_weights
=
None
,
**
kwargs
):
batch_input_shape
=
tuple
(
img
[
0
].
size
()[
-
2
:])
for
img_meta
in
img_metas
:
img_meta
[
'batch_input_shape'
]
=
batch_input_shape
xs
=
self
.
extract_feat
(
img
)
# x0: x01, x02, x03, x04, x05
# x1: x11, x12, x13, x14, x15
if
not
isinstance
(
xs
[
0
],
(
list
,
tuple
)):
xs
=
[
xs
]
loss_weights
=
None
elif
loss_weights
is
None
:
loss_weights
=
[
0.5
]
+
[
1
]
*
(
len
(
xs
)
-
1
)
# [0.5, 1]
losses
=
dict
()
new_x
=
[
torch
.
cat
((
xs
[
0
][
i
],
xs
[
1
][
i
]))
for
i
in
range
(
len
(
xs
[
0
]))]
img_metas
=
img_metas
+
img_metas
gt_bboxes
=
gt_bboxes
+
gt_bboxes
gt_labels
=
gt_labels
+
gt_labels
gt_bboxes_ignore
=
gt_bboxes_ignore
+
\
gt_bboxes_ignore
if
gt_bboxes_ignore
is
not
None
else
None
losses
=
self
.
bbox_head
.
forward_train
(
new_x
,
img_metas
,
gt_bboxes
,
gt_labels
,
gt_bboxes_ignore
)
return
losses
def
simple_test
(
self
,
img
,
img_metas
,
rescale
=
False
):
feat
=
self
.
extract_feat
(
img
)
results_list
=
self
.
bbox_head
.
simple_test_bboxes
(
feat
,
img_metas
,
rescale
=
rescale
)
bbox_results
=
[
bbox2result
(
det_bboxes
,
det_labels
,
self
.
bbox_head
.
num_classes
)
for
det_bboxes
,
det_labels
in
results_list
]
return
bbox_results
detection/mmdet_custom/models/necks/__init__.py
0 → 100644
View file @
21d9f185
from
.cbnet_channel_mapper
import
CBChannelMapper
__all__
=
[
'CBChannelMapper'
]
detection/mmdet_custom/models/necks/cbnet_channel_mapper.py
0 → 100644
View file @
21d9f185
from
mmdet.models.builder
import
NECKS
from
mmdet.models.necks
import
ChannelMapper
@
NECKS
.
register_module
()
class
CBChannelMapper
(
ChannelMapper
):
def
__init__
(
self
,
cb_idx
=
1
,
**
kwargs
):
super
(
CBChannelMapper
,
self
).
__init__
(
**
kwargs
)
self
.
cb_idx
=
cb_idx
def
forward
(
self
,
inputs
):
if
not
isinstance
(
inputs
[
0
],
(
list
,
tuple
)):
inputs
=
[
inputs
]
if
self
.
training
:
outs
=
[]
# from IPython import embed; embed()
for
x
in
inputs
:
out
=
super
().
forward
(
x
)
outs
.
append
(
out
)
return
outs
else
:
out
=
super
().
forward
(
inputs
[
self
.
cb_idx
])
return
out
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment