Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dcnv3
Commits
00af501a
Commit
00af501a
authored
Mar 04, 2025
by
zhe chen
Browse files
Release detection models
parent
0dec0215
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
502 additions
and
4 deletions
+502
-4
detection/README.md
detection/README.md
+6
-2
detection/configs/coco/dino_4scale_cbinternimage_h_objects365_coco_ss.py
...gs/coco/dino_4scale_cbinternimage_h_objects365_coco_ss.py
+6
-1
detection/configs/coco/dino_4scale_internimage_g_objects365_coco_ss.py
...figs/coco/dino_4scale_internimage_g_objects365_coco_ss.py
+192
-0
detection/configs/coco/dino_4scale_internimage_h_objects365_coco_ss.py
...figs/coco/dino_4scale_internimage_h_objects365_coco_ss.py
+192
-0
detection/mmcv_custom/__init__.py
detection/mmcv_custom/__init__.py
+4
-1
detection/mmcv_custom/efficient_ffn.py
detection/mmcv_custom/efficient_ffn.py
+102
-0
No files found.
detection/README.md
View file @
00af501a
...
@@ -112,8 +112,12 @@ Prepare datasets according to the guidelines in [MMDetection v2.28.1](https://gi
...
@@ -112,8 +112,12 @@ Prepare datasets according to the guidelines in [MMDetection v2.28.1](https://gi
| :--------: | :--------------: | :--: | :-----: | :----: | :----------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| :--------: | :--------------: | :--: | :-----: | :----: | :----------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| DINO | InternImage-T | 1x | 53.9 | 49M |
[
config
](
./configs/coco/dino_4scale_internimage_t_1x_coco_layer_wise_lr.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_t_1x_coco.pth
)
\|
[
log
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_t_1x_coco.json
)
|
| DINO | InternImage-T | 1x | 53.9 | 49M |
[
config
](
./configs/coco/dino_4scale_internimage_t_1x_coco_layer_wise_lr.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_t_1x_coco.pth
)
\|
[
log
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_t_1x_coco.json
)
|
| DINO | InternImage-L | 1x | 57.6 | 241M |
[
config
](
./configs/coco/dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.pth
)
\|
[
log
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.log.json
)
|
| DINO | InternImage-L | 1x | 57.6 | 241M |
[
config
](
./configs/coco/dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.pth
)
\|
[
log
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.log.json
)
|
| DINO | CB-InternImage-H | 1x | 64.5 | 2.18B |
[
config
](
./configs/coco/dino_4scale_cbinternimage_h_objects365_coco_ss.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_coco.pth
)
|
| DINO | InternImage-H | 1x | 63.4 | 1.1B |
[
config
](
./configs/coco/dino_4scale_internimage_h_objects365_coco_ss.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_h_objects365_coco.pth
)
|
| DINO (TTA) | CB-InternImage-H | 1x | 65.0 | 2.18B | - |
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_coco.pth
)
|
| DINO | CB-InternImage-H | 1x | 64.5 | 2.2B |
[
config
](
./configs/coco/dino_4scale_cbinternimage_h_objects365_coco_ss.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_coco.pth
)
|
| DINO (TTA) | CB-InternImage-H | 1x | 65.0 | 2.2B | TODO |
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_coco.pth
)
|
| DINO | InternImage-G | 1x | 64.2 | 3.1B |
[
config
](
./configs/coco/dino_4scale_internimage_g_objects365_coco_ss.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_internimage_g_objects365_coco.pth
)
|
| DINO (TTA) | CB-InternImage-G | 1x | 65.1 | 6B | TODO | TODO |
| DINO (TTA) | CB-InternImage-G | 1x | 65.3 | 6B | TODO | TODO |
</div>
</div>
...
...
detection/configs/coco/dino_4scale_cbinternimage_h_objects365_coco_ss.py
View file @
00af501a
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_
=
[
_base_
=
[
'../_base_/datasets/coco_detection.py'
,
'../_base_/datasets/coco_detection.py'
,
'../_base_/default_runtime.py'
'../_base_/default_runtime.py'
...
@@ -122,7 +127,7 @@ model = dict(
...
@@ -122,7 +127,7 @@ model = dict(
snip_cfg
=
dict
(
snip_cfg
=
dict
(
type
=
'v3'
,
type
=
'v3'
,
weight
=
0.1
)),
weight
=
0.1
)),
test_cfg
=
dict
(
max_per_img
=
300
))
# TODO: Originally 100
test_cfg
=
dict
(
max_per_img
=
300
))
img_norm_cfg
=
dict
(
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
...
...
detection/configs/coco/dino_4scale_internimage_g_objects365_coco_ss.py
0 → 100644
View file @
00af501a
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_
=
[
'../_base_/datasets/coco_detection.py'
,
'../_base_/default_runtime.py'
]
model
=
dict
(
type
=
'DINO'
,
backbone
=
dict
(
type
=
'InternImage'
,
core_op
=
'DCNv3'
,
channels
=
512
,
depths
=
[
2
,
2
,
48
,
4
],
groups
=
[
16
,
32
,
64
,
128
],
mlp_ratio
=
4.
,
drop_path_rate
=
0.5
,
norm_layer
=
'LN'
,
layer_scale
=
None
,
offset_scale
=
1.0
,
post_norm
=
True
,
dw_kernel_size
=
5
,
# for InternImage-H/G
res_post_norm
=
False
,
# for InternImage-H/G
level2_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm_block_ids
=
[
5
,
11
,
17
,
23
,
29
,
35
,
41
,
47
],
# for InternImage-H/G
center_feature_scale
=
True
,
# for InternImage-H/G
with_cp
=
True
,
out_indices
=
(
1
,
2
,
3
),
init_cfg
=
None
# dict(type='Pretrained', checkpoint=pretrained)
),
neck
=
dict
(
type
=
'ChannelMapper'
,
in_channels
=
[
1024
,
2048
,
4096
],
kernel_size
=
1
,
out_channels
=
256
,
act_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
num_outs
=
4
),
bbox_head
=
dict
(
type
=
'DINOHead'
,
num_query
=
900
,
num_classes
=
80
,
in_channels
=
2048
,
# TODO
sync_cls_avg_factor
=
True
,
as_two_stage
=
True
,
with_box_refine
=
True
,
dn_cfg
=
dict
(
type
=
'CdnQueryGenerator'
,
noise_scale
=
dict
(
label
=
0.5
,
box
=
1.0
),
# 0.5, 0.4 for DN-DETR
group_cfg
=
dict
(
dynamic
=
True
,
num_groups
=
None
,
num_dn_queries
=
1000
)),
transformer
=
dict
(
type
=
'DinoTransformer'
,
two_stage_num_proposals
=
900
,
encoder
=
dict
(
type
=
'DetrTransformerEncoder'
,
num_layers
=
6
,
transformerlayers
=
dict
(
type
=
'BaseTransformerLayer'
,
attn_cfgs
=
dict
(
type
=
'MultiScaleDeformableAttention'
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'EfficientFFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
decoder
=
dict
(
type
=
'DinoTransformerDecoder'
,
num_layers
=
6
,
return_intermediate
=
True
,
transformerlayers
=
dict
(
type
=
'DetrTransformerDecoderLayer'
,
attn_cfgs
=
[
dict
(
type
=
'MultiheadAttention'
,
embed_dims
=
256
,
num_heads
=
8
,
dropout
=
0.0
),
# 0.1 for DeformDETR
dict
(
type
=
'MultiScaleDeformableAttention'
,
num_levels
=
4
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
],
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'EfficientFFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)))),
positional_encoding
=
dict
(
type
=
'SinePositionalEncoding'
,
num_feats
=
128
,
temperature
=
20
,
normalize
=
True
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
# 2.0 in DeformDETR
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
5.0
),
loss_iou
=
dict
(
type
=
'GIoULoss'
,
loss_weight
=
2.0
)),
# training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'HungarianAssigner'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBoxL1Cost'
,
weight
=
5.0
,
box_format
=
'xywh'
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
iou_mode
=
'giou'
,
weight
=
2.0
)),
snip_cfg
=
dict
(
type
=
'v3'
,
weight
=
0.1
)),
test_cfg
=
dict
(
max_per_img
=
300
))
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
# from the default setting in mmdet.
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
2000
,
600
),
(
2000
,
1800
)],
multiscale_mode
=
'range'
,
keep_ratio
=
True
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2000
,
1000
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
2
,
train
=
dict
(
filter_empty_gt
=
True
,
pipeline
=
train_pipeline
),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.0001
,
constructor
=
'CustomLayerDecayOptimizerConstructor'
,
paramwise_cfg
=
dict
(
num_layers
=
56
,
layer_decay_rate
=
0.94
,
depths
=
[
2
,
2
,
48
,
4
],
offset_lr_scale
=
1e-3
))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[])
runner
=
dict
(
type
=
'IterBasedRunner'
,
max_iters
=
20000
)
checkpoint_config
=
dict
(
interval
=
200
,
max_keep_ckpts
=
3
)
evaluation
=
dict
(
interval
=
200
,
save_best
=
'auto'
)
# resume_from = None
# custom_hooks = [
# dict(
# type='ExpMomentumEMAHook',
# resume_from=resume_from,
# momentum=0.0003,
# priority=49)
# ]
detection/configs/coco/dino_4scale_internimage_h_objects365_coco_ss.py
0 → 100644
View file @
00af501a
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_
=
[
'../_base_/datasets/coco_detection.py'
,
'../_base_/default_runtime.py'
]
model
=
dict
(
type
=
'DINO'
,
backbone
=
dict
(
type
=
'InternImage'
,
core_op
=
'DCNv3'
,
channels
=
320
,
depths
=
[
6
,
6
,
32
,
6
],
groups
=
[
10
,
20
,
40
,
80
],
mlp_ratio
=
4.
,
drop_path_rate
=
0.5
,
norm_layer
=
'LN'
,
layer_scale
=
None
,
offset_scale
=
1.0
,
post_norm
=
False
,
dw_kernel_size
=
5
,
# for InternImage-H/G
res_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm_block_ids
=
[
5
,
11
,
17
,
23
,
29
],
# for InternImage-H/G
center_feature_scale
=
True
,
# for InternImage-H/G
with_cp
=
True
,
out_indices
=
(
1
,
2
,
3
),
init_cfg
=
None
# dict(type='Pretrained', checkpoint=pretrained)
),
neck
=
dict
(
type
=
'ChannelMapper'
,
in_channels
=
[
640
,
1280
,
2560
],
kernel_size
=
1
,
out_channels
=
256
,
act_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
num_outs
=
4
),
bbox_head
=
dict
(
type
=
'DINOHead'
,
num_query
=
900
,
num_classes
=
80
,
in_channels
=
2048
,
# TODO
sync_cls_avg_factor
=
True
,
as_two_stage
=
True
,
with_box_refine
=
True
,
dn_cfg
=
dict
(
type
=
'CdnQueryGenerator'
,
noise_scale
=
dict
(
label
=
0.5
,
box
=
1.0
),
# 0.5, 0.4 for DN-DETR
group_cfg
=
dict
(
dynamic
=
True
,
num_groups
=
None
,
num_dn_queries
=
1000
)),
transformer
=
dict
(
type
=
'DinoTransformer'
,
two_stage_num_proposals
=
900
,
encoder
=
dict
(
type
=
'DetrTransformerEncoder'
,
num_layers
=
6
,
transformerlayers
=
dict
(
type
=
'BaseTransformerLayer'
,
attn_cfgs
=
dict
(
type
=
'MultiScaleDeformableAttention'
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
decoder
=
dict
(
type
=
'DinoTransformerDecoder'
,
num_layers
=
6
,
return_intermediate
=
True
,
transformerlayers
=
dict
(
type
=
'DetrTransformerDecoderLayer'
,
attn_cfgs
=
[
dict
(
type
=
'MultiheadAttention'
,
embed_dims
=
256
,
num_heads
=
8
,
dropout
=
0.0
),
# 0.1 for DeformDETR
dict
(
type
=
'MultiScaleDeformableAttention'
,
num_levels
=
4
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
],
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)))),
positional_encoding
=
dict
(
type
=
'SinePositionalEncoding'
,
num_feats
=
128
,
temperature
=
20
,
normalize
=
True
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
# 2.0 in DeformDETR
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
5.0
),
loss_iou
=
dict
(
type
=
'GIoULoss'
,
loss_weight
=
2.0
)),
# training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'HungarianAssigner'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBoxL1Cost'
,
weight
=
5.0
,
box_format
=
'xywh'
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
iou_mode
=
'giou'
,
weight
=
2.0
)),
snip_cfg
=
dict
(
type
=
'v3'
,
weight
=
0.1
)),
test_cfg
=
dict
(
max_per_img
=
300
))
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
# from the default setting in mmdet.
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
2000
,
600
),
(
2000
,
1800
)],
multiscale_mode
=
'range'
,
keep_ratio
=
True
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2000
,
1000
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
2
,
train
=
dict
(
filter_empty_gt
=
True
,
pipeline
=
train_pipeline
),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.0001
,
constructor
=
'CustomLayerDecayOptimizerConstructor'
,
paramwise_cfg
=
dict
(
num_layers
=
50
,
layer_decay_rate
=
0.94
,
depths
=
[
6
,
6
,
32
,
6
],
offset_lr_scale
=
1e-3
))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[])
runner
=
dict
(
type
=
'IterBasedRunner'
,
max_iters
=
20000
)
checkpoint_config
=
dict
(
interval
=
200
,
max_keep_ckpts
=
3
)
evaluation
=
dict
(
interval
=
200
,
save_best
=
'auto'
)
# resume_from = None
# custom_hooks = [
# dict(
# type='ExpMomentumEMAHook',
# resume_from=resume_from,
# momentum=0.0003,
# priority=49)
# ]
detection/mmcv_custom/__init__.py
View file @
00af501a
...
@@ -4,11 +4,14 @@
...
@@ -4,11 +4,14 @@
# Licensed under The MIT License [see LICENSE for details]
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
# --------------------------------------------------------
import
torch
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
from
.custom_layer_decay_optimizer_constructor
import
\
from
.custom_layer_decay_optimizer_constructor
import
\
CustomLayerDecayOptimizerConstructor
CustomLayerDecayOptimizerConstructor
from
.efficient_ffn
import
EfficientFFN
__all__
=
[
'CustomLayerDecayOptimizerConstructor'
]
__all__
=
[
'CustomLayerDecayOptimizerConstructor'
,
'EfficientFFN'
]
if
torch
.
__version__
.
startswith
(
'1.11'
):
if
torch
.
__version__
.
startswith
(
'1.11'
):
...
...
detection/mmcv_custom/efficient_ffn.py
0 → 100644
View file @
00af501a
# Copyright (c) OpenMMLab. All rights reserved.
import
math
import
warnings
from
typing
import
Sequence
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.utils.checkpoint
as
checkpoint
from
mmcv.cnn
import
(
build_activation_layer
,
build_conv_layer
,
build_norm_layer
,
xavier_init
)
from
mmcv.cnn.bricks.registry
import
(
FEEDFORWARD_NETWORK
,
TRANSFORMER_LAYER
,
TRANSFORMER_LAYER_SEQUENCE
)
from
mmcv.cnn.bricks.transformer
import
(
BaseTransformerLayer
,
TransformerLayerSequence
,
build_transformer_layer_sequence
)
from
mmcv.runner
import
force_fp32
from
mmcv.runner.base_module
import
BaseModule
from
mmcv.utils
import
deprecated_api_warning
,
to_2tuple
from
mmdet.models.utils.builder
import
TRANSFORMER
from
torch.nn.init
import
normal_
@
FEEDFORWARD_NETWORK
.
register_module
()
class
EfficientFFN
(
BaseModule
):
@
deprecated_api_warning
(
{
'dropout'
:
'ffn_drop'
,
'add_residual'
:
'add_identity'
},
cls_name
=
'EfficientFFN'
)
def
__init__
(
self
,
embed_dims
=
256
,
feedforward_channels
=
1024
,
num_fcs
=
2
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),
ffn_drop
=
0.
,
dropout_layer
=
None
,
add_identity
=
True
,
init_cfg
=
None
,
split
=
4
,
use_checkpoint
=
False
,
**
kwargs
):
super
(
EfficientFFN
,
self
).
__init__
(
init_cfg
)
assert
num_fcs
>=
2
,
'num_fcs should be no less '
\
f
'than 2. got
{
num_fcs
}
.'
self
.
embed_dims
=
embed_dims
self
.
feedforward_channels
=
feedforward_channels
self
.
num_fcs
=
num_fcs
self
.
act_cfg
=
act_cfg
self
.
activate
=
build_activation_layer
(
act_cfg
)
self
.
drop
=
nn
.
Dropout
(
ffn_drop
)
in_channels
=
embed_dims
self
.
use_checkpoint
=
use_checkpoint
self
.
split
=
split
for
i
in
range
(
split
):
fc1
=
nn
.
Linear
(
in_channels
,
feedforward_channels
//
self
.
split
,
bias
=
True
)
setattr
(
self
,
f
'fc1_
{
i
}
'
,
fc1
)
for
i
in
range
(
split
):
fc2
=
nn
.
Linear
(
feedforward_channels
//
self
.
split
,
embed_dims
,
bias
=
False
)
setattr
(
self
,
f
'fc2_
{
i
}
'
,
fc2
)
self
.
fc2_bias
=
nn
.
Parameter
(
torch
.
zeros
(
(
embed_dims
)),
requires_grad
=
True
)
# fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.fc2_0.weight)
# bound = 1 / math.sqrt(fan_in)
# torch.nn.init.uniform_(self.fc2_bias, -bound, bound)
self
.
dropout_layer
=
build_dropout
(
dropout_layer
)
if
dropout_layer
else
torch
.
nn
.
Identity
()
self
.
add_identity
=
add_identity
@
deprecated_api_warning
({
'residual'
:
'identity'
},
cls_name
=
'FFN'
)
def
forward
(
self
,
x
,
identity
=
None
):
def
_inner_forward
(
x
,
i
):
fc1
=
getattr
(
self
,
f
'fc1_
{
i
}
'
)
x
=
fc1
(
x
)
x
=
self
.
activate
(
x
)
x
=
self
.
drop
(
x
)
fc2
=
getattr
(
self
,
f
'fc2_
{
i
}
'
)
x
=
fc2
(
x
)
x
=
self
.
drop
(
x
)
return
x
out
=
0
for
i
in
range
(
self
.
split
):
if
self
.
use_checkpoint
and
x
.
requires_grad
:
out
=
out
+
checkpoint
.
checkpoint
(
_inner_forward
,
x
,
i
)
else
:
out
=
out
+
_inner_forward
(
x
,
i
)
out
=
out
+
self
.
fc2_bias
if
not
self
.
add_identity
:
return
self
.
dropout_layer
(
out
)
if
identity
is
None
:
identity
=
x
return
identity
+
self
.
dropout_layer
(
out
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment