Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
GroundingDINO_mmcv
Commits
b12850fe
Commit
b12850fe
authored
May 29, 2024
by
dengjb
Browse files
update codes
parent
6515fb96
Pipeline
#1046
failed with stages
in 0 seconds
Changes
1000
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1978 additions
and
0 deletions
+1978
-0
configs/_base_/datasets/voc0712.py
configs/_base_/datasets/voc0712.py
+92
-0
configs/_base_/datasets/wider_face.py
configs/_base_/datasets/wider_face.py
+73
-0
configs/_base_/datasets/youtube_vis.py
configs/_base_/datasets/youtube_vis.py
+66
-0
configs/_base_/default_runtime.py
configs/_base_/default_runtime.py
+24
-0
configs/_base_/models/cascade-mask-rcnn_r50_fpn.py
configs/_base_/models/cascade-mask-rcnn_r50_fpn.py
+203
-0
configs/_base_/models/cascade-rcnn_r50_fpn.py
configs/_base_/models/cascade-rcnn_r50_fpn.py
+185
-0
configs/_base_/models/fast-rcnn_r50_fpn.py
configs/_base_/models/fast-rcnn_r50_fpn.py
+68
-0
configs/_base_/models/faster-rcnn_r50-caffe-c4.py
configs/_base_/models/faster-rcnn_r50-caffe-c4.py
+123
-0
configs/_base_/models/faster-rcnn_r50-caffe-dc5.py
configs/_base_/models/faster-rcnn_r50-caffe-dc5.py
+111
-0
configs/_base_/models/faster-rcnn_r50_fpn.py
configs/_base_/models/faster-rcnn_r50_fpn.py
+114
-0
configs/_base_/models/mask-rcnn_r50-caffe-c4.py
configs/_base_/models/mask-rcnn_r50-caffe-c4.py
+132
-0
configs/_base_/models/mask-rcnn_r50_fpn.py
configs/_base_/models/mask-rcnn_r50_fpn.py
+127
-0
configs/_base_/models/retinanet_r50_fpn.py
configs/_base_/models/retinanet_r50_fpn.py
+68
-0
configs/_base_/models/rpn_r50-caffe-c4.py
configs/_base_/models/rpn_r50-caffe-c4.py
+64
-0
configs/_base_/models/rpn_r50_fpn.py
configs/_base_/models/rpn_r50_fpn.py
+64
-0
configs/_base_/models/ssd300.py
configs/_base_/models/ssd300.py
+63
-0
configs/_base_/schedules/schedule_1x.py
configs/_base_/schedules/schedule_1x.py
+28
-0
configs/_base_/schedules/schedule_20e.py
configs/_base_/schedules/schedule_20e.py
+28
-0
configs/_base_/schedules/schedule_2x.py
configs/_base_/schedules/schedule_2x.py
+28
-0
configs/grounding_dino/README.md
configs/grounding_dino/README.md
+317
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
configs/_base_/datasets/voc0712.py
0 → 100644
View file @
b12850fe
# dataset settings
dataset_type
=
'VOCDataset'
data_root
=
'data/VOCdevkit/'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically Infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection/segmentation/VOCdevkit/'
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/segmentation/',
# 'data/': 's3://openmmlab/datasets/segmentation/'
# }))
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
scale
=
(
1000
,
600
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PackDetInputs'
)
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
backend_args
=
backend_args
),
dict
(
type
=
'Resize'
,
scale
=
(
1000
,
600
),
keep_ratio
=
True
),
# avoid bboxes being resized
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'PackDetInputs'
,
meta_keys
=
(
'img_id'
,
'img_path'
,
'ori_shape'
,
'img_shape'
,
'scale_factor'
))
]
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
batch_sampler
=
dict
(
type
=
'AspectRatioBatchSampler'
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
3
,
dataset
=
dict
(
type
=
'ConcatDataset'
,
# VOCDataset will add different `dataset_type` in dataset.metainfo,
# which will get error if using ConcatDataset. Adding
# `ignore_keys` can avoid this error.
ignore_keys
=
[
'dataset_type'
],
datasets
=
[
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'VOC2007/ImageSets/Main/trainval.txt'
,
data_prefix
=
dict
(
sub_data_root
=
'VOC2007/'
),
filter_cfg
=
dict
(
filter_empty_gt
=
True
,
min_size
=
32
,
bbox_min_size
=
32
),
pipeline
=
train_pipeline
,
backend_args
=
backend_args
),
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'VOC2012/ImageSets/Main/trainval.txt'
,
data_prefix
=
dict
(
sub_data_root
=
'VOC2012/'
),
filter_cfg
=
dict
(
filter_empty_gt
=
True
,
min_size
=
32
,
bbox_min_size
=
32
),
pipeline
=
train_pipeline
,
backend_args
=
backend_args
)
])))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'VOC2007/ImageSets/Main/test.txt'
,
data_prefix
=
dict
(
sub_data_root
=
'VOC2007/'
),
test_mode
=
True
,
pipeline
=
test_pipeline
,
backend_args
=
backend_args
))
test_dataloader
=
val_dataloader
# Pascal VOC2007 uses `11points` as default evaluate mode, while PASCAL
# VOC2012 defaults to use 'area'.
val_evaluator
=
dict
(
type
=
'VOCMetric'
,
metric
=
'mAP'
,
eval_mode
=
'11points'
)
test_evaluator
=
val_evaluator
configs/_base_/datasets/wider_face.py
0 → 100644
View file @
b12850fe
# dataset settings
dataset_type
=
'WIDERFaceDataset'
data_root
=
'data/WIDERFace/'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection/cityscapes/'
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection/',
# 'data/': 's3://openmmlab/datasets/detection/'
# }))
backend_args
=
None
img_scale
=
(
640
,
640
)
# VGA resolution
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
scale
=
img_scale
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'PackDetInputs'
)
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
backend_args
=
backend_args
),
dict
(
type
=
'Resize'
,
scale
=
img_scale
,
keep_ratio
=
True
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'PackDetInputs'
,
meta_keys
=
(
'img_id'
,
'img_path'
,
'ori_shape'
,
'img_shape'
,
'scale_factor'
))
]
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
batch_sampler
=
dict
(
type
=
'AspectRatioBatchSampler'
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'train.txt'
,
data_prefix
=
dict
(
img
=
'WIDER_train'
),
filter_cfg
=
dict
(
filter_empty_gt
=
True
,
bbox_min_size
=
17
,
min_size
=
32
),
pipeline
=
train_pipeline
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'val.txt'
,
data_prefix
=
dict
(
img
=
'WIDER_val'
),
test_mode
=
True
,
pipeline
=
test_pipeline
))
test_dataloader
=
val_dataloader
val_evaluator
=
dict
(
# TODO: support WiderFace-Evaluation for easy, medium, hard cases
type
=
'VOCMetric'
,
metric
=
'mAP'
,
eval_mode
=
'11points'
)
test_evaluator
=
val_evaluator
configs/_base_/datasets/youtube_vis.py
0 → 100644
View file @
b12850fe
dataset_type
=
'YouTubeVISDataset'
data_root
=
'data/youtube_vis_2019/'
dataset_version
=
data_root
[
-
5
:
-
1
]
# 2019 or 2021
backend_args
=
None
# dataset settings
train_pipeline
=
[
dict
(
type
=
'UniformRefFrameSample'
,
num_ref_imgs
=
1
,
frame_range
=
100
,
filter_key_img
=
True
),
dict
(
type
=
'TransformBroadcaster'
,
share_random_params
=
True
,
transforms
=
[
dict
(
type
=
'LoadImageFromFile'
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadTrackAnnotations'
,
with_mask
=
True
),
dict
(
type
=
'Resize'
,
scale
=
(
640
,
360
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
]),
dict
(
type
=
'PackTrackInputs'
)
]
test_pipeline
=
[
dict
(
type
=
'TransformBroadcaster'
,
transforms
=
[
dict
(
type
=
'LoadImageFromFile'
,
backend_args
=
backend_args
),
dict
(
type
=
'Resize'
,
scale
=
(
640
,
360
),
keep_ratio
=
True
),
dict
(
type
=
'LoadTrackAnnotations'
,
with_mask
=
True
),
]),
dict
(
type
=
'PackTrackInputs'
)
]
# dataloader
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
persistent_workers
=
True
,
# sampler=dict(type='TrackImgSampler'), # image-based sampling
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
batch_sampler
=
dict
(
type
=
'TrackAspectRatioBatchSampler'
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
dataset_version
=
dataset_version
,
ann_file
=
'annotations/youtube_vis_2019_train.json'
,
data_prefix
=
dict
(
img_path
=
'train/JPEGImages'
),
pipeline
=
train_pipeline
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
,
round_up
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
dataset_version
=
dataset_version
,
ann_file
=
'annotations/youtube_vis_2019_valid.json'
,
data_prefix
=
dict
(
img_path
=
'valid/JPEGImages'
),
test_mode
=
True
,
pipeline
=
test_pipeline
))
test_dataloader
=
val_dataloader
configs/_base_/default_runtime.py
0 → 100644
View file @
b12850fe
default_scope
=
'mmdet'
default_hooks
=
dict
(
timer
=
dict
(
type
=
'IterTimerHook'
),
logger
=
dict
(
type
=
'LoggerHook'
,
interval
=
50
),
param_scheduler
=
dict
(
type
=
'ParamSchedulerHook'
),
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
),
sampler_seed
=
dict
(
type
=
'DistSamplerSeedHook'
),
visualization
=
dict
(
type
=
'DetVisualizationHook'
))
env_cfg
=
dict
(
cudnn_benchmark
=
False
,
mp_cfg
=
dict
(
mp_start_method
=
'fork'
,
opencv_num_threads
=
0
),
dist_cfg
=
dict
(
backend
=
'nccl'
),
)
vis_backends
=
[
dict
(
type
=
'LocalVisBackend'
)]
visualizer
=
dict
(
type
=
'DetLocalVisualizer'
,
vis_backends
=
vis_backends
,
name
=
'visualizer'
)
log_processor
=
dict
(
type
=
'LogProcessor'
,
window_size
=
50
,
by_epoch
=
True
)
log_level
=
'INFO'
load_from
=
None
resume
=
False
configs/_base_/models/cascade-mask-rcnn_r50_fpn.py
0 → 100644
View file @
b12850fe
# model settings
model
=
dict
(
type
=
'CascadeRCNN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_mask
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet50'
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'CascadeRoIHead'
,
num_stages
=
3
,
stage_loss_weights
=
[
1
,
0.5
,
0.25
],
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
[
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
)),
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.05
,
0.05
,
0.1
,
0.1
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
)),
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.033
,
0.033
,
0.067
,
0.067
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
))
],
mask_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
14
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
mask_head
=
dict
(
type
=
'FCNMaskHead'
,
num_convs
=
4
,
in_channels
=
256
,
conv_out_channels
=
256
,
num_classes
=
80
,
loss_mask
=
dict
(
type
=
'CrossEntropyLoss'
,
use_mask
=
True
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
2000
,
max_per_img
=
2000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
[
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
),
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.6
,
min_pos_iou
=
0.6
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
),
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.7
,
min_pos_iou
=
0.7
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
)
]),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
,
mask_thr_binary
=
0.5
)))
configs/_base_/models/cascade-rcnn_r50_fpn.py
0 → 100644
View file @
b12850fe
# model settings
model
=
dict
(
type
=
'CascadeRCNN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet50'
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'CascadeRoIHead'
,
num_stages
=
3
,
stage_loss_weights
=
[
1
,
0.5
,
0.25
],
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
[
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
)),
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.05
,
0.05
,
0.1
,
0.1
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
)),
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.033
,
0.033
,
0.067
,
0.067
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
))
]),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
2000
,
max_per_img
=
2000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
[
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
),
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.6
,
min_pos_iou
=
0.6
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
),
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.7
,
min_pos_iou
=
0.7
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
)
]),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
)))
configs/_base_/models/fast-rcnn_r50_fpn.py
0 → 100644
View file @
b12850fe
# model settings
model
=
dict
(
type
=
'FastRCNN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet50'
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
)))
configs/_base_/models/faster-rcnn_r50-caffe-c4.py
0 → 100644
View file @
b12850fe
# model settings
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
3
,
strides
=
(
1
,
2
,
2
),
dilations
=
(
1
,
1
,
1
),
out_indices
=
(
2
,
),
frozen_stages
=
1
,
norm_cfg
=
norm_cfg
,
norm_eval
=
True
,
style
=
'caffe'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'open-mmlab://detectron2/resnet50_caffe'
)),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
1024
,
feat_channels
=
1024
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
2
,
4
,
8
,
16
,
32
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
16
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
shared_head
=
dict
(
type
=
'ResLayer'
,
depth
=
50
,
stage
=
3
,
stride
=
2
,
dilation
=
1
,
style
=
'caffe'
,
norm_cfg
=
norm_cfg
,
norm_eval
=
True
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'open-mmlab://detectron2/resnet50_caffe'
)),
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
14
,
sampling_ratio
=
0
),
out_channels
=
1024
,
featmap_strides
=
[
16
]),
bbox_head
=
dict
(
type
=
'BBoxHead'
,
with_avg_pool
=
True
,
roi_feat_size
=
7
,
in_channels
=
2048
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
12000
,
max_per_img
=
2000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
6000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
)))
configs/_base_/models/faster-rcnn_r50-caffe-dc5.py
0 → 100644
View file @
b12850fe
# model settings
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
strides
=
(
1
,
2
,
2
,
1
),
dilations
=
(
1
,
1
,
1
,
2
),
out_indices
=
(
3
,
),
frozen_stages
=
1
,
norm_cfg
=
norm_cfg
,
norm_eval
=
True
,
style
=
'caffe'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'open-mmlab://detectron2/resnet50_caffe'
)),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
2048
,
feat_channels
=
2048
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
2
,
4
,
8
,
16
,
32
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
16
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
2048
,
featmap_strides
=
[
16
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
2048
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
12000
,
max_per_img
=
2000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
nms_pre
=
6000
,
max_per_img
=
1000
,
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
)))
configs/_base_/models/faster-rcnn_r50_fpn.py
0 → 100644
View file @
b12850fe
# model settings
model
=
dict
(
type
=
'FasterRCNN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet50'
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
2000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
))
configs/_base_/models/mask-rcnn_r50-caffe-c4.py
0 → 100644
View file @
b12850fe
# model settings
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'MaskRCNN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_mask
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
3
,
strides
=
(
1
,
2
,
2
),
dilations
=
(
1
,
1
,
1
),
out_indices
=
(
2
,
),
frozen_stages
=
1
,
norm_cfg
=
norm_cfg
,
norm_eval
=
True
,
style
=
'caffe'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'open-mmlab://detectron2/resnet50_caffe'
)),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
1024
,
feat_channels
=
1024
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
2
,
4
,
8
,
16
,
32
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
16
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
shared_head
=
dict
(
type
=
'ResLayer'
,
depth
=
50
,
stage
=
3
,
stride
=
2
,
dilation
=
1
,
style
=
'caffe'
,
norm_cfg
=
norm_cfg
,
norm_eval
=
True
),
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
14
,
sampling_ratio
=
0
),
out_channels
=
1024
,
featmap_strides
=
[
16
]),
bbox_head
=
dict
(
type
=
'BBoxHead'
,
with_avg_pool
=
True
,
roi_feat_size
=
7
,
in_channels
=
2048
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
mask_roi_extractor
=
None
,
mask_head
=
dict
(
type
=
'FCNMaskHead'
,
num_convs
=
0
,
in_channels
=
2048
,
conv_out_channels
=
256
,
num_classes
=
80
,
loss_mask
=
dict
(
type
=
'CrossEntropyLoss'
,
use_mask
=
True
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
12000
,
max_per_img
=
2000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
14
,
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
6000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
max_per_img
=
1000
,
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
,
mask_thr_binary
=
0.5
)))
configs/_base_/models/mask-rcnn_r50_fpn.py
0 → 100644
View file @
b12850fe
# model settings
model
=
dict
(
type
=
'MaskRCNN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_mask
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet50'
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
mask_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
14
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
mask_head
=
dict
(
type
=
'FCNMaskHead'
,
num_convs
=
4
,
in_channels
=
256
,
conv_out_channels
=
256
,
num_classes
=
80
,
loss_mask
=
dict
(
type
=
'CrossEntropyLoss'
,
use_mask
=
True
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
2000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
,
mask_thr_binary
=
0.5
)))
configs/_base_/models/retinanet_r50_fpn.py
0 → 100644
View file @
b12850fe
# model settings
model
=
dict
(
type
=
'RetinaNet'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet50'
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
'on_input'
,
num_outs
=
5
),
bbox_head
=
dict
(
type
=
'RetinaHead'
,
num_classes
=
80
,
in_channels
=
256
,
stacked_convs
=
4
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
octave_base_scale
=
4
,
scales_per_octave
=
3
,
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
8
,
16
,
32
,
64
,
128
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'PseudoSampler'
),
# Focal loss should use PseudoSampler
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
))
configs/_base_/models/rpn_r50-caffe-c4.py
0 → 100644
View file @
b12850fe
# model settings
model
=
dict
(
type
=
'RPN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
3
,
strides
=
(
1
,
2
,
2
),
dilations
=
(
1
,
1
,
1
),
out_indices
=
(
2
,
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'open-mmlab://detectron2/resnet50_caffe'
)),
neck
=
None
,
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
1024
,
feat_channels
=
1024
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
2
,
4
,
8
,
16
,
32
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
16
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
12000
,
max_per_img
=
2000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
)))
configs/_base_/models/rpn_r50_fpn.py
0 → 100644
View file @
b12850fe
# model settings
model
=
dict
(
type
=
'RPN'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'torchvision://resnet50'
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
2000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
)))
configs/_base_/models/ssd300.py
0 → 100644
View file @
b12850fe
# model settings
input_size
=
300
model
=
dict
(
type
=
'SingleStageDetector'
,
data_preprocessor
=
dict
(
type
=
'DetDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
1
,
1
,
1
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
1
),
backbone
=
dict
(
type
=
'SSDVGG'
,
depth
=
16
,
with_last_pool
=
False
,
ceil_mode
=
True
,
out_indices
=
(
3
,
4
),
out_feature_indices
=
(
22
,
34
),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'open-mmlab://vgg16_caffe'
)),
neck
=
dict
(
type
=
'SSDNeck'
,
in_channels
=
(
512
,
1024
),
out_channels
=
(
512
,
1024
,
512
,
256
,
256
,
256
),
level_strides
=
(
2
,
2
,
1
,
1
),
level_paddings
=
(
1
,
1
,
0
,
0
),
l2_norm_scale
=
20
),
bbox_head
=
dict
(
type
=
'SSDHead'
,
in_channels
=
(
512
,
1024
,
512
,
256
,
256
,
256
),
num_classes
=
80
,
anchor_generator
=
dict
(
type
=
'SSDAnchorGenerator'
,
scale_major
=
False
,
input_size
=
input_size
,
basesize_ratio_range
=
(
0.15
,
0.9
),
strides
=
[
8
,
16
,
32
,
64
,
100
,
300
],
ratios
=
[[
2
],
[
2
,
3
],
[
2
,
3
],
[
2
,
3
],
[
2
],
[
2
]]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
])),
# model training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.
,
ignore_iof_thr
=-
1
,
gt_max_assign_all
=
False
),
sampler
=
dict
(
type
=
'PseudoSampler'
),
smoothl1_beta
=
1.
,
allowed_border
=-
1
,
pos_weight
=-
1
,
neg_pos_ratio
=
3
,
debug
=
False
),
test_cfg
=
dict
(
nms_pre
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.45
),
min_bbox_size
=
0
,
score_thr
=
0.02
,
max_per_img
=
200
))
cudnn_benchmark
=
True
configs/_base_/schedules/schedule_1x.py
0 → 100644
View file @
b12850fe
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
12
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
start_factor
=
0.001
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
12
,
by_epoch
=
True
,
milestones
=
[
8
,
11
],
gamma
=
0.1
)
]
# optimizer
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
16
)
configs/_base_/schedules/schedule_20e.py
0 → 100644
View file @
b12850fe
# training schedule for 20e
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
20
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
start_factor
=
0.001
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
20
,
by_epoch
=
True
,
milestones
=
[
16
,
19
],
gamma
=
0.1
)
]
# optimizer
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
16
)
configs/_base_/schedules/schedule_2x.py
0 → 100644
View file @
b12850fe
# training schedule for 2x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
24
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
start_factor
=
0.001
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
24
,
by_epoch
=
True
,
milestones
=
[
16
,
22
],
gamma
=
0.1
)
]
# optimizer
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
16
)
configs/grounding_dino/README.md
0 → 100644
View file @
b12850fe
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
7
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment