Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dcnv3
Commits
ecd555e9
Commit
ecd555e9
authored
Feb 25, 2025
by
zhe chen
Browse files
Release LVIS model
Release LVIS model
parent
21d9f185
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
499 additions
and
1 deletion
+499
-1
detection/configs/_base_/datasets/lvis_v1_instance.py
detection/configs/_base_/datasets/lvis_v1_instance.py
+1
-1
detection/configs/_base_/datasets/lvis_v1_instance_minival.py
...ction/configs/_base_/datasets/lvis_v1_instance_minival.py
+54
-0
detection/configs/lvis/README.md
detection/configs/lvis/README.md
+15
-0
detection/configs/lvis/dino_4scale_cbinternimage_h_objects365_lvis_minival_ss.py
...dino_4scale_cbinternimage_h_objects365_lvis_minival_ss.py
+199
-0
detection/configs/lvis/dino_4scale_cbinternimage_h_objects365_lvis_val_ss.py
...vis/dino_4scale_cbinternimage_h_objects365_lvis_val_ss.py
+199
-0
detection/mmcv_custom/__init__.py
detection/mmcv_custom/__init__.py
+31
-0
No files found.
detection/configs/_base_/datasets/lvis_v1_instance.py
View file @
ecd555e9
...
...
@@ -48,7 +48,7 @@ data = dict(
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/lvis_v1_
mini
val.json'
,
ann_file
=
data_root
+
'annotations/lvis_v1_val.json'
,
img_prefix
=
data_root
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
metric
=
[
'bbox'
])
detection/configs/_base_/datasets/lvis_v1_instance_minival.py
0 → 100644
View file @
ecd555e9
# dataset settings
_base_
=
'coco_instance.py'
dataset_type
=
'LVISV1Dataset'
data_root
=
'data/lvis_v1/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_masks'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
_delete_
=
True
,
type
=
'ClassBalancedDataset'
,
oversample_thr
=
1e-3
,
dataset
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/lvis_v1_train.json'
,
img_prefix
=
data_root
,
pipeline
=
train_pipeline
)),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/lvis_v1_minival.json'
,
img_prefix
=
data_root
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/lvis_v1_minival.json'
,
img_prefix
=
data_root
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
metric
=
[
'bbox'
])
detection/configs/lvis/README.md
0 → 100644
View file @
ecd555e9
# LVIS
## Introduction
LVIS is a dataset for long tail instance segmentation. It has annotations for over 1000 object categories in 164k images.
## Model Zoo
### DINO + CB-InternImage
Here, we report the box AP on the minival set and the val set, respectively.
| backbone | pretrain | minival (ss) | val (ss/ms) | #param | Config | Download |
| :--------------: | :--------: | :----------: | :---------: | :----: | :-------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------: |
| CB-InternImage-H | Objects365 | 65.8 | 62.3 / 63.2 | 2.18B |
[
config
](
./dino_4scale_cbinternimage_h_objects365_lvis_minival_ss.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_lvis.pth
)
|
detection/configs/lvis/dino_4scale_cbinternimage_h_objects365_lvis_minival_ss.py
0 → 100644
View file @
ecd555e9
_base_
=
[
'../_base_/datasets/lvis_v1_instance_minival.py'
,
'../_base_/default_runtime.py'
]
load_from
=
'https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_80classes.pth'
model
=
dict
(
type
=
'CBDINO'
,
backbone
=
dict
(
type
=
'CBInternImage'
,
core_op
=
'DCNv3'
,
channels
=
320
,
depths
=
[
6
,
6
,
32
,
6
],
groups
=
[
10
,
20
,
40
,
80
],
mlp_ratio
=
4.
,
drop_path_rate
=
0.5
,
norm_layer
=
'LN'
,
layer_scale
=
None
,
offset_scale
=
1.0
,
post_norm
=
False
,
dw_kernel_size
=
5
,
# for InternImage-H/G
res_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm_block_ids
=
[
5
,
11
,
17
,
23
,
29
],
# for InternImage-H/G
center_feature_scale
=
True
,
# for InternImage-H/G
with_cp
=
True
,
out_indices
=
[(
0
,
1
,
2
,
3
),
(
1
,
2
,
3
)],
init_cfg
=
None
,
),
neck
=
[
dict
(
type
=
'CBChannelMapper'
,
in_channels
=
[
640
,
1280
,
2560
],
kernel_size
=
1
,
out_channels
=
256
,
act_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
num_outs
=
4
)],
bbox_head
=
dict
(
type
=
'CBDINOHead'
,
num_query
=
900
,
num_classes
=
1203
,
in_channels
=
2048
,
# TODO
sync_cls_avg_factor
=
True
,
as_two_stage
=
True
,
with_box_refine
=
True
,
dn_cfg
=
dict
(
type
=
'CdnQueryGenerator'
,
noise_scale
=
dict
(
label
=
0.5
,
box
=
1.0
),
# 0.5, 0.4 for DN-DETR
group_cfg
=
dict
(
dynamic
=
True
,
num_groups
=
None
,
num_dn_queries
=
1000
)),
transformer
=
dict
(
type
=
'DinoTransformer'
,
two_stage_num_proposals
=
900
,
encoder
=
dict
(
type
=
'DetrTransformerEncoder'
,
num_layers
=
6
,
transformerlayers
=
dict
(
type
=
'BaseTransformerLayer'
,
attn_cfgs
=
dict
(
type
=
'MultiScaleDeformableAttention'
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
decoder
=
dict
(
type
=
'DinoTransformerDecoder'
,
num_layers
=
6
,
return_intermediate
=
True
,
transformerlayers
=
dict
(
type
=
'DetrTransformerDecoderLayer'
,
attn_cfgs
=
[
dict
(
type
=
'MultiheadAttention'
,
embed_dims
=
256
,
num_heads
=
8
,
dropout
=
0.0
),
# 0.1 for DeformDETR
dict
(
type
=
'MultiScaleDeformableAttention'
,
num_levels
=
4
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
],
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)))),
positional_encoding
=
dict
(
type
=
'SinePositionalEncoding'
,
num_feats
=
128
,
temperature
=
20
,
normalize
=
True
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
# 2.0 in DeformDETR
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
5.0
),
loss_iou
=
dict
(
type
=
'GIoULoss'
,
loss_weight
=
2.0
)),
# training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'HungarianAssigner'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBoxL1Cost'
,
weight
=
5.0
,
box_format
=
'xywh'
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
iou_mode
=
'giou'
,
weight
=
2.0
)),
snip_cfg
=
dict
(
type
=
'v3'
,
weight
=
0.1
),
fed_loss_cfg
=
dict
(
use_fed_loss
=
True
,
fed_loss_num_classes
=
50
,
dataset_names
=
'lvis_v1_train'
,
freq_weight_power
=
0.5
,
)
),
test_cfg
=
dict
(
max_per_img
=
300
))
# TODO: Originally 100
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
# from the default setting in mmdet.
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
2000
,
600
),
(
2000
,
1200
)],
multiscale_mode
=
'range'
,
keep_ratio
=
True
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2000
,
1000
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
1
,
train
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.0001
,
constructor
=
'CustomLayerDecayOptimizerConstructor'
,
paramwise_cfg
=
dict
(
num_layers
=
50
,
layer_decay_rate
=
0.94
,
depths
=
[
6
,
6
,
32
,
6
],
offset_lr_scale
=
1e-3
))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[])
runner
=
dict
(
type
=
'IterBasedRunner'
,
max_iters
=
80000
)
checkpoint_config
=
dict
(
interval
=
500
,
max_keep_ckpts
=
10
)
evaluation
=
dict
(
interval
=
500
)
# resume_from = None
# custom_hooks = [
# dict(
# type='ExpMomentumEMAHook',
# resume_from=resume_from,
# momentum=0.0003,
# priority=49),
# dict(
# type='ZeroHook',
# interval=500,
# priority=49),
# ]
detection/configs/lvis/dino_4scale_cbinternimage_h_objects365_lvis_val_ss.py
0 → 100644
View file @
ecd555e9
_base_
=
[
'../_base_/datasets/lvis_v1_instance.py'
,
'../_base_/default_runtime.py'
]
load_from
=
'https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_80classes.pth'
model
=
dict
(
type
=
'CBDINO'
,
backbone
=
dict
(
type
=
'CBInternImage'
,
core_op
=
'DCNv3'
,
channels
=
320
,
depths
=
[
6
,
6
,
32
,
6
],
groups
=
[
10
,
20
,
40
,
80
],
mlp_ratio
=
4.
,
drop_path_rate
=
0.5
,
norm_layer
=
'LN'
,
layer_scale
=
None
,
offset_scale
=
1.0
,
post_norm
=
False
,
dw_kernel_size
=
5
,
# for InternImage-H/G
res_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm_block_ids
=
[
5
,
11
,
17
,
23
,
29
],
# for InternImage-H/G
center_feature_scale
=
True
,
# for InternImage-H/G
with_cp
=
True
,
out_indices
=
[(
0
,
1
,
2
,
3
),
(
1
,
2
,
3
)],
init_cfg
=
None
,
),
neck
=
[
dict
(
type
=
'CBChannelMapper'
,
in_channels
=
[
640
,
1280
,
2560
],
kernel_size
=
1
,
out_channels
=
256
,
act_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
num_outs
=
4
)],
bbox_head
=
dict
(
type
=
'CBDINOHead'
,
num_query
=
900
,
num_classes
=
1203
,
in_channels
=
2048
,
# TODO
sync_cls_avg_factor
=
True
,
as_two_stage
=
True
,
with_box_refine
=
True
,
dn_cfg
=
dict
(
type
=
'CdnQueryGenerator'
,
noise_scale
=
dict
(
label
=
0.5
,
box
=
1.0
),
# 0.5, 0.4 for DN-DETR
group_cfg
=
dict
(
dynamic
=
True
,
num_groups
=
None
,
num_dn_queries
=
1000
)),
transformer
=
dict
(
type
=
'DinoTransformer'
,
two_stage_num_proposals
=
900
,
encoder
=
dict
(
type
=
'DetrTransformerEncoder'
,
num_layers
=
6
,
transformerlayers
=
dict
(
type
=
'BaseTransformerLayer'
,
attn_cfgs
=
dict
(
type
=
'MultiScaleDeformableAttention'
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
decoder
=
dict
(
type
=
'DinoTransformerDecoder'
,
num_layers
=
6
,
return_intermediate
=
True
,
transformerlayers
=
dict
(
type
=
'DetrTransformerDecoderLayer'
,
attn_cfgs
=
[
dict
(
type
=
'MultiheadAttention'
,
embed_dims
=
256
,
num_heads
=
8
,
dropout
=
0.0
),
# 0.1 for DeformDETR
dict
(
type
=
'MultiScaleDeformableAttention'
,
num_levels
=
4
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
],
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)))),
positional_encoding
=
dict
(
type
=
'SinePositionalEncoding'
,
num_feats
=
128
,
temperature
=
20
,
normalize
=
True
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
# 2.0 in DeformDETR
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
5.0
),
loss_iou
=
dict
(
type
=
'GIoULoss'
,
loss_weight
=
2.0
)),
# training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'HungarianAssigner'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBoxL1Cost'
,
weight
=
5.0
,
box_format
=
'xywh'
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
iou_mode
=
'giou'
,
weight
=
2.0
)),
snip_cfg
=
dict
(
type
=
'v3'
,
weight
=
0.1
),
fed_loss_cfg
=
dict
(
use_fed_loss
=
True
,
fed_loss_num_classes
=
50
,
dataset_names
=
'lvis_v1_train'
,
freq_weight_power
=
0.5
,
)
),
test_cfg
=
dict
(
max_per_img
=
300
))
# TODO: Originally 100
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
# from the default setting in mmdet.
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
2000
,
600
),
(
2000
,
1200
)],
multiscale_mode
=
'range'
,
keep_ratio
=
True
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2000
,
1000
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
1
,
train
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.0001
,
constructor
=
'CustomLayerDecayOptimizerConstructor'
,
paramwise_cfg
=
dict
(
num_layers
=
50
,
layer_decay_rate
=
0.94
,
depths
=
[
6
,
6
,
32
,
6
],
offset_lr_scale
=
1e-3
))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[])
runner
=
dict
(
type
=
'IterBasedRunner'
,
max_iters
=
80000
)
checkpoint_config
=
dict
(
interval
=
500
,
max_keep_ckpts
=
10
)
evaluation
=
dict
(
interval
=
500
)
# resume_from = None
# custom_hooks = [
# dict(
# type='ExpMomentumEMAHook',
# resume_from=resume_from,
# momentum=0.0003,
# priority=49),
# dict(
# type='ZeroHook',
# interval=500,
# priority=49),
# ]
detection/mmcv_custom/__init__.py
View file @
ecd555e9
...
...
@@ -9,3 +9,34 @@ from .custom_layer_decay_optimizer_constructor import \
CustomLayerDecayOptimizerConstructor
__all__
=
[
'CustomLayerDecayOptimizerConstructor'
]
if
torch
.
__version__
.
startswith
(
'1.11'
):
from
mmcv.runner.hooks
import
HOOKS
,
Hook
from
mmcv.runner.optimizer.builder
import
OPTIMIZERS
from
mmdet.utils.util_distribution
import
ddp_factory
# noqa: F401,F403
from
torch.distributed.optim
import
ZeroRedundancyOptimizer
class
ZeroAdamW
(
ZeroRedundancyOptimizer
):
def
__init__
(
self
,
params
,
optimizer_class
=
torch
.
optim
.
AdamW
,
**
kwargs
):
super
().
__init__
(
params
[
0
][
'params'
],
optimizer_class
=
optimizer_class
,
parameters_as_bucket_view
=
True
,
**
kwargs
)
for
i
in
range
(
1
,
len
(
params
)):
self
.
add_param_group
(
params
[
i
])
OPTIMIZERS
.
register_module
()(
ZeroAdamW
)
@
HOOKS
.
register_module
()
class
ZeroHook
(
Hook
):
def
__init__
(
self
,
interval
):
self
.
interval
=
interval
def
after_epoch
(
self
,
runner
):
runner
.
optimizer
.
consolidate_state_dict
(
to
=
0
)
def
after_train_iter
(
self
,
runner
):
if
self
.
every_n_iters
(
runner
,
self
.
interval
):
runner
.
optimizer
.
consolidate_state_dict
(
to
=
0
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment