Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dcnv3
Commits
63f0666a
Commit
63f0666a
authored
Feb 26, 2025
by
zhe chen
Browse files
Release OpenImages model
parent
ecd555e9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
198 additions
and
3 deletions
+198
-3
detection/configs/_base_/datasets/openimages_detection.py
detection/configs/_base_/datasets/openimages_detection.py
+3
-3
detection/configs/openimages/README.md
detection/configs/openimages/README.md
+13
-0
detection/configs/openimages/dino_4scale_cbinternimage_h_objects365_openimages_ss.py
...s/dino_4scale_cbinternimage_h_objects365_openimages_ss.py
+182
-0
No files found.
detection/configs/_base_/datasets/openimages_detection.py
View file @
63f0666a
...
...
@@ -35,7 +35,7 @@ data = dict(
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/oidv6-train-annotations-bbox.csv'
,
img_prefix
=
data_root
+
'
OpenImages/
train/'
,
img_prefix
=
data_root
+
'train/'
,
label_file
=
data_root
+
'annotations/class-descriptions-boxable.csv'
,
hierarchy_file
=
data_root
+
'annotations/bbox_labels_600_hierarchy.json'
,
...
...
@@ -43,7 +43,7 @@ data = dict(
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/validation-annotations-bbox.csv'
,
img_prefix
=
data_root
+
'
OpenImages/
validation/'
,
img_prefix
=
data_root
+
'validation/'
,
label_file
=
data_root
+
'annotations/class-descriptions-boxable.csv'
,
hierarchy_file
=
data_root
+
'annotations/bbox_labels_600_hierarchy.json'
,
...
...
@@ -54,7 +54,7 @@ data = dict(
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/validation-annotations-bbox.csv'
,
img_prefix
=
data_root
+
'
OpenImages/
validation/'
,
img_prefix
=
data_root
+
'validation/'
,
label_file
=
data_root
+
'annotations/class-descriptions-boxable.csv'
,
hierarchy_file
=
data_root
+
'annotations/bbox_labels_600_hierarchy.json'
,
...
...
detection/configs/openimages/README.md
0 → 100644
View file @
63f0666a
# OpenImages
## Introduction
OpenImages V6 is a large-scale dataset , consists of 9 million training images, 41,620 validation samples, and 125,456 test samples. It is a partially annotated dataset, with 9,600 trainable classes.
## Model Zoo
### DINO + CB-InternImage
| backbone | pretrain | mAP (ss) | #param | Config | Download |
| :--------------: | :--------: | :------: | :----: | :-----------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------: |
| CB-InternImage-H | Objects365 | 74.1 | 2.18B |
[
config
](
./dino_4scale_cbinternimage_h_objects365_openimages_ss.py
)
|
[
ckpt
](
https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_openimages.pth
)
|
detection/configs/openimages/dino_4scale_cbinternimage_h_objects365_openimages_ss.py
0 → 100644
View file @
63f0666a
_base_
=
[
'../_base_/datasets/openimages_detection.py'
,
'../_base_/default_runtime.py'
]
load_from
=
'https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_80classes.pth'
model
=
dict
(
type
=
'CBDINO'
,
backbone
=
dict
(
type
=
'CBInternImage'
,
core_op
=
'DCNv3'
,
channels
=
320
,
depths
=
[
6
,
6
,
32
,
6
],
groups
=
[
10
,
20
,
40
,
80
],
mlp_ratio
=
4.
,
drop_path_rate
=
0.5
,
norm_layer
=
'LN'
,
layer_scale
=
None
,
offset_scale
=
1.0
,
post_norm
=
False
,
dw_kernel_size
=
5
,
# for InternImage-H/G
res_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm
=
True
,
# for InternImage-H/G
level2_post_norm_block_ids
=
[
5
,
11
,
17
,
23
,
29
],
# for InternImage-H/G
center_feature_scale
=
True
,
# for InternImage-H/G
with_cp
=
True
,
out_indices
=
[(
0
,
1
,
2
,
3
),
(
1
,
2
,
3
)],
init_cfg
=
None
,
),
neck
=
[
dict
(
type
=
'CBChannelMapper'
,
in_channels
=
[
640
,
1280
,
2560
],
kernel_size
=
1
,
out_channels
=
256
,
act_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
),
num_outs
=
4
)],
bbox_head
=
dict
(
type
=
'CBDINOHead'
,
num_query
=
900
,
num_classes
=
601
,
in_channels
=
2048
,
# TODO
sync_cls_avg_factor
=
True
,
as_two_stage
=
True
,
with_box_refine
=
True
,
dn_cfg
=
dict
(
type
=
'CdnQueryGenerator'
,
noise_scale
=
dict
(
label
=
0.5
,
box
=
1.0
),
# 0.5, 0.4 for DN-DETR
group_cfg
=
dict
(
dynamic
=
True
,
num_groups
=
None
,
num_dn_queries
=
1000
)),
transformer
=
dict
(
type
=
'DinoTransformer'
,
two_stage_num_proposals
=
900
,
encoder
=
dict
(
type
=
'DetrTransformerEncoder'
,
num_layers
=
6
,
transformerlayers
=
dict
(
type
=
'BaseTransformerLayer'
,
attn_cfgs
=
dict
(
type
=
'MultiScaleDeformableAttention'
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
decoder
=
dict
(
type
=
'DinoTransformerDecoder'
,
num_layers
=
6
,
return_intermediate
=
True
,
transformerlayers
=
dict
(
type
=
'DetrTransformerDecoderLayer'
,
attn_cfgs
=
[
dict
(
type
=
'MultiheadAttention'
,
embed_dims
=
256
,
num_heads
=
8
,
dropout
=
0.0
),
# 0.1 for DeformDETR
dict
(
type
=
'MultiScaleDeformableAttention'
,
num_levels
=
4
,
embed_dims
=
256
,
dropout
=
0.0
),
# 0.1 for DeformDETR
],
feedforward_channels
=
2048
,
# 1024 for DeformDETR
ffn_cfgs
=
dict
(
type
=
'FFN'
,
embed_dims
=
256
,
feedforward_channels
=
2048
,
num_fcs
=
2
,
ffn_drop
=
0.
,
use_checkpoint
=
True
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
),),
ffn_dropout
=
0.0
,
# 0.1 for DeformDETR
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)))),
positional_encoding
=
dict
(
type
=
'SinePositionalEncoding'
,
num_feats
=
128
,
temperature
=
20
,
normalize
=
True
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
# 2.0 in DeformDETR
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
5.0
),
loss_iou
=
dict
(
type
=
'GIoULoss'
,
loss_weight
=
2.0
)),
# training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'HungarianAssigner'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBoxL1Cost'
,
weight
=
5.0
,
box_format
=
'xywh'
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
iou_mode
=
'giou'
,
weight
=
2.0
)),
snip_cfg
=
dict
(
type
=
'v3'
,
weight
=
0.1
)),
test_cfg
=
dict
(
max_per_img
=
300
))
# TODO: Originally 100
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
# from the default setting in mmdet.
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
denorm_bbox
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
2000
,
600
),
(
2000
,
1200
)],
multiscale_mode
=
'range'
,
keep_ratio
=
True
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
2000
,
1000
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
2
,
train_dataloader
=
dict
(
class_aware_sampler
=
dict
(
num_sample_class
=
1
)),
train
=
dict
(
filter_empty_gt
=
True
,
pipeline
=
train_pipeline
),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.0001
,
constructor
=
'CustomLayerDecayOptimizerConstructor'
,
paramwise_cfg
=
dict
(
num_layers
=
50
,
layer_decay_rate
=
0.94
,
depths
=
[
6
,
6
,
32
,
6
],
offset_lr_scale
=
1e-3
))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
0.1
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[])
runner
=
dict
(
type
=
'IterBasedRunner'
,
max_iters
=
100000
)
checkpoint_config
=
dict
(
interval
=
2000
,
max_keep_ckpts
=
3
)
evaluation
=
dict
(
interval
=
2000
,
metric
=
'mAP'
,
save_best
=
'auto'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment