Commit b12850fe authored by dengjb's avatar dengjb
Browse files

update codes

parent 6515fb96
Pipeline #1046 failed with stages
in 0 seconds
_base_ = 'grounding_dino_swin-t_pretrain_zeroshot_concat_dod.py'
model = dict(
type='GroundingDINO',
backbone=dict(
pretrain_img_size=384,
embed_dims=128,
depths=[2, 2, 18, 2],
num_heads=[4, 8, 16, 32],
window_size=12,
drop_path_rate=0.3,
patch_norm=True),
neck=dict(in_channels=[256, 512, 1024]),
)
_base_ = 'grounding_dino_swin-b_pretrain_zeroshot_concat_dod.py'
model = dict(test_cfg=dict(chunked_size=1))
_base_ = '../grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py'
data_root = 'data/d3/'
test_pipeline = [
dict(
type='LoadImageFromFile', backend_args=None,
imdecode_backend='pillow'),
dict(
type='FixScaleResize',
scale=(800, 1333),
keep_ratio=True,
backend='pillow'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'text', 'custom_entities', 'sent_ids'))
]
# -------------------------------------------------#
val_dataset_full = dict(
type='DODDataset',
data_root=data_root,
ann_file='d3_json/d3_full_annotations.json',
data_prefix=dict(img='d3_images/', anno='d3_pkl'),
pipeline=test_pipeline,
test_mode=True,
backend_args=None,
return_classes=True)
val_evaluator_full = dict(
type='DODCocoMetric',
ann_file=data_root + 'd3_json/d3_full_annotations.json')
# -------------------------------------------------#
val_dataset_pres = dict(
type='DODDataset',
data_root=data_root,
ann_file='d3_json/d3_pres_annotations.json',
data_prefix=dict(img='d3_images/', anno='d3_pkl'),
pipeline=test_pipeline,
test_mode=True,
backend_args=None,
return_classes=True)
val_evaluator_pres = dict(
type='DODCocoMetric',
ann_file=data_root + 'd3_json/d3_pres_annotations.json')
# -------------------------------------------------#
val_dataset_abs = dict(
type='DODDataset',
data_root=data_root,
ann_file='d3_json/d3_abs_annotations.json',
data_prefix=dict(img='d3_images/', anno='d3_pkl'),
pipeline=test_pipeline,
test_mode=True,
backend_args=None,
return_classes=True)
val_evaluator_abs = dict(
type='DODCocoMetric',
ann_file=data_root + 'd3_json/d3_abs_annotations.json')
# -------------------------------------------------#
datasets = [val_dataset_full, val_dataset_pres, val_dataset_abs]
dataset_prefixes = ['FULL', 'PRES', 'ABS']
metrics = [val_evaluator_full, val_evaluator_pres, val_evaluator_abs]
val_dataloader = dict(
dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets))
test_dataloader = val_dataloader
val_evaluator = dict(
_delete_=True,
type='MultiDatasetsEvaluator',
metrics=metrics,
dataset_prefixes=dataset_prefixes)
test_evaluator = val_evaluator
_base_ = 'grounding_dino_swin-t_pretrain_zeroshot_concat_dod.py'
model = dict(test_cfg=dict(chunked_size=1))
_base_ = '../grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py'
dataset_type = 'Flickr30kDataset'
data_root = 'data/flickr30k_entities/'
test_pipeline = [
dict(
type='LoadImageFromFile', backend_args=None,
imdecode_backend='pillow'),
dict(
type='FixScaleResize',
scale=(800, 1333),
keep_ratio=True,
backend='pillow'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'text', 'custom_entities',
'tokens_positive', 'phrase_ids', 'phrases'))
]
dataset_Flickr30k_val = dict(
type=dataset_type,
data_root=data_root,
ann_file='final_flickr_separateGT_val.json',
data_prefix=dict(img='flickr30k_images/'),
pipeline=test_pipeline,
)
dataset_Flickr30k_test = dict(
type=dataset_type,
data_root=data_root,
ann_file='final_flickr_separateGT_test.json',
data_prefix=dict(img='flickr30k_images/'),
pipeline=test_pipeline,
)
val_evaluator_Flickr30k = dict(type='Flickr30kMetric')
test_evaluator_Flickr30k = dict(type='Flickr30kMetric')
# ----------Config---------- #
dataset_prefixes = ['Flickr30kVal', 'Flickr30kTest']
datasets = [dataset_Flickr30k_val, dataset_Flickr30k_test]
metrics = [val_evaluator_Flickr30k, test_evaluator_Flickr30k]
val_dataloader = dict(
dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets))
test_dataloader = val_dataloader
val_evaluator = dict(
_delete_=True,
type='MultiDatasetsEvaluator',
metrics=metrics,
dataset_prefixes=dataset_prefixes)
test_evaluator = val_evaluator
work_dir = './work_dirs/grounding_dino_r50_scratch_8xb2_1x_coco'
data_root = 'datasets/coco_mini/'
train_anno = 'annotations/instances_train2017.json'
val_anno = 'annotations/instances_val2017.json'
train_image_dir = 'images/train2017/'
val_image_dir = 'images/val2017/'
test_batch_size = 1
train_batch_size = 2
max_epochs = 12
auto_scale_lr = dict(base_batch_size=16, enable=False)
backend_args = None
dataset_type = 'CocoDataset'
default_hooks = dict(
checkpoint=dict(interval=1, type='CheckpointHook'),
logger=dict(interval=50, type='LoggerHook'),
param_scheduler=dict(type='ParamSchedulerHook'),
sampler_seed=dict(type='DistSamplerSeedHook'),
timer=dict(type='IterTimerHook'),
visualization=dict(type='DetVisualizationHook'))
default_scope = 'mmdet'
env_cfg = dict(
cudnn_benchmark=False,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
lang_model_name = 'bert-base-uncased'
launcher = 'pytorch'
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
model = dict(
as_two_stage=True,
backbone=dict(
depth=50,
frozen_stages=1,
init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'),
norm_cfg=dict(requires_grad=False, type='BN'),
norm_eval=True,
num_stages=4,
out_indices=(
1,
2,
3,
),
style='pytorch',
type='ResNet'),
bbox_head=dict(
contrastive_cfg=dict(bias=True, log_scale='auto', max_text_len=256),
loss_bbox=dict(loss_weight=5.0, type='L1Loss'),
loss_cls=dict(
alpha=0.25,
gamma=2.0,
loss_weight=1.0,
type='FocalLoss',
use_sigmoid=True),
loss_iou=dict(loss_weight=2.0, type='GIoULoss'),
num_classes=80,
sync_cls_avg_factor=True,
type='GroundingDINOHead'),
data_preprocessor=dict(
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_mask=False,
std=[
58.395,
57.12,
57.375,
],
type='DetDataPreprocessor'),
decoder=dict(
layer_cfg=dict(
cross_attn_cfg=dict(dropout=0.0, embed_dims=256, num_heads=8),
cross_attn_text_cfg=dict(dropout=0.0, embed_dims=256, num_heads=8),
ffn_cfg=dict(
embed_dims=256, feedforward_channels=2048, ffn_drop=0.0),
self_attn_cfg=dict(dropout=0.0, embed_dims=256, num_heads=8)),
num_layers=6,
post_norm_cfg=None,
return_intermediate=True),
dn_cfg=dict(
box_noise_scale=1.0,
group_cfg=dict(dynamic=True, num_dn_queries=100, num_groups=None),
label_noise_scale=0.5),
encoder=dict(
fusion_layer_cfg=dict(
embed_dim=1024,
init_values=0.0001,
l_dim=256,
num_heads=4,
v_dim=256),
layer_cfg=dict(
ffn_cfg=dict(
embed_dims=256, feedforward_channels=2048, ffn_drop=0.0),
self_attn_cfg=dict(dropout=0.0, embed_dims=256, num_levels=4)),
num_cp=6,
num_layers=6,
text_layer_cfg=dict(
ffn_cfg=dict(
embed_dims=256, feedforward_channels=1024, ffn_drop=0.0),
self_attn_cfg=dict(dropout=0.0, embed_dims=256, num_heads=4))),
language_model=dict(
add_pooling_layer=False,
name='bert-base-uncased',
pad_to_max=False,
special_tokens_list=[
'[CLS]',
'[SEP]',
'.',
'?',
],
type='BertModel',
use_sub_sentence_represent=True),
neck=dict(
act_cfg=None,
bias=True,
in_channels=[
512,
1024,
2048,
],
kernel_size=1,
norm_cfg=dict(num_groups=32, type='GN'),
num_outs=4,
out_channels=256,
type='ChannelMapper'),
num_queries=900,
positional_encoding=dict(
normalize=True, num_feats=128, offset=0.0, temperature=20),
test_cfg=dict(max_per_img=300),
train_cfg=dict(
assigner=dict(
match_costs=[
dict(type='BinaryFocalLossCost', weight=2.0),
dict(box_format='xywh', type='BBoxL1Cost', weight=5.0),
dict(iou_mode='giou', type='IoUCost', weight=2.0),
],
type='HungarianAssigner')),
type='GroundingDINO',
with_box_refine=True)
optim_wrapper = dict(
clip_grad=dict(max_norm=0.1, norm_type=2),
optimizer=dict(lr=0.0001, type='AdamW', weight_decay=0.0001),
paramwise_cfg=dict(
custom_keys=dict(
absolute_pos_embed=dict(decay_mult=0.0),
backbone=dict(lr_mult=0.1))),
type='OptimWrapper')
param_scheduler = [
dict(
begin=0,
by_epoch=True,
end=12,
gamma=0.1,
milestones=[
11,
],
type='MultiStepLR'),
]
resume = False
test_cfg = dict(type='TestLoop')
test_dataloader = dict(
batch_size=test_batch_size,
dataset=dict(
ann_file=val_anno,
backend_args=None,
data_prefix=dict(img=val_image_dir),
data_root=data_root,
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
800,
1333,
), type='FixScaleResize'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
'text',
'custom_entities',
),
type='PackDetInputs'),
],
return_classes=True,
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
test_evaluator = dict(
ann_file=data_root+val_anno,
backend_args=None,
format_only=False,
metric='bbox',
type='CocoMetric')
test_pipeline = [
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
800,
1333,
), type='FixScaleResize'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
'text',
'custom_entities',
),
type='PackDetInputs'),
]
train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
train_dataloader = dict(
batch_sampler=dict(type='AspectRatioBatchSampler'),
batch_size=train_batch_size,
dataset=dict(
ann_file=train_anno,
backend_args=None,
data_prefix=dict(img=train_image_dir),
data_root=data_root,
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(prob=0.5, type='RandomFlip'),
dict(
transforms=[
[
dict(
keep_ratio=True,
scales=[
(
480,
1333,
),
(
512,
1333,
),
(
544,
1333,
),
(
576,
1333,
),
(
608,
1333,
),
(
640,
1333,
),
(
672,
1333,
),
(
704,
1333,
),
(
736,
1333,
),
(
768,
1333,
),
(
800,
1333,
),
],
type='RandomChoiceResize'),
],
[
dict(
keep_ratio=True,
scales=[
(
400,
4200,
),
(
500,
4200,
),
(
600,
4200,
),
],
type='RandomChoiceResize'),
dict(
allow_negative_crop=True,
crop_size=(
384,
600,
),
crop_type='absolute_range',
type='RandomCrop'),
dict(
keep_ratio=True,
scales=[
(
480,
1333,
),
(
512,
1333,
),
(
544,
1333,
),
(
576,
1333,
),
(
608,
1333,
),
(
640,
1333,
),
(
672,
1333,
),
(
704,
1333,
),
(
736,
1333,
),
(
768,
1333,
),
(
800,
1333,
),
],
type='RandomChoiceResize'),
],
],
type='RandomChoice'),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
'flip',
'flip_direction',
'text',
'custom_entities',
),
type='PackDetInputs'),
],
return_classes=True,
type='CocoDataset'),
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=True, type='DefaultSampler'))
train_pipeline = [
dict(backend_args=None, type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(prob=0.5, type='RandomFlip'),
dict(
transforms=[
[
dict(
keep_ratio=True,
scales=[
(
480,
1333,
),
(
512,
1333,
),
(
544,
1333,
),
(
576,
1333,
),
(
608,
1333,
),
(
640,
1333,
),
(
672,
1333,
),
(
704,
1333,
),
(
736,
1333,
),
(
768,
1333,
),
(
800,
1333,
),
],
type='RandomChoiceResize'),
],
[
dict(
keep_ratio=True,
scales=[
(
400,
4200,
),
(
500,
4200,
),
(
600,
4200,
),
],
type='RandomChoiceResize'),
dict(
allow_negative_crop=True,
crop_size=(
384,
600,
),
crop_type='absolute_range',
type='RandomCrop'),
dict(
keep_ratio=True,
scales=[
(
480,
1333,
),
(
512,
1333,
),
(
544,
1333,
),
(
576,
1333,
),
(
608,
1333,
),
(
640,
1333,
),
(
672,
1333,
),
(
704,
1333,
),
(
736,
1333,
),
(
768,
1333,
),
(
800,
1333,
),
],
type='RandomChoiceResize'),
],
],
type='RandomChoice'),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
'flip',
'flip_direction',
'text',
'custom_entities',
),
type='PackDetInputs'),
]
val_cfg = dict(type='ValLoop')
val_dataloader = dict(
batch_size=test_batch_size,
dataset=dict(
ann_file=val_anno,
backend_args=None,
data_prefix=dict(img=val_image_dir),
data_root=data_root,
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
800,
1333,
), type='FixScaleResize'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
'text',
'custom_entities',
),
type='PackDetInputs'),
],
return_classes=True,
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
val_evaluator = dict(
ann_file=data_root+val_anno,
backend_args=None,
format_only=False,
metric='bbox',
type='CocoMetric')
vis_backends = [
dict(type='LocalVisBackend'),
]
visualizer = dict(
name='visualizer',
type='DetLocalVisualizer',
vis_backends=[
dict(type='LocalVisBackend'),
])
_base_ = [
'./grounding_dino_swin-t_finetune_16xb2_1x_coco.py',
]
load_from = 'https://download.openmmlab.com/mmdetection/v3.0/grounding_dino/groundingdino_swinb_cogcoor_mmdet-55949c9c.pth' # noqa
model = dict(
type='GroundingDINO',
backbone=dict(
pretrain_img_size=384,
embed_dims=128,
depths=[2, 2, 18, 2],
num_heads=[4, 8, 16, 32],
window_size=12,
drop_path_rate=0.3,
patch_norm=True),
neck=dict(in_channels=[256, 512, 1024]),
)
_base_ = [
'./grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py',
]
model = dict(
type='GroundingDINO',
backbone=dict(
pretrain_img_size=384,
embed_dims=128,
depths=[2, 2, 18, 2],
num_heads=[4, 8, 16, 32],
window_size=12,
drop_path_rate=0.3,
patch_norm=True),
neck=dict(in_channels=[256, 512, 1024]),
)
_base_ = [
'../_base_/datasets/coco_detection.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
load_from = 'https://download.openmmlab.com/mmdetection/v3.0/grounding_dino/groundingdino_swint_ogc_mmdet-822d7e9d.pth' # noqa
lang_model_name = 'bert-base-uncased'
model = dict(
type='GroundingDINO',
num_queries=900,
with_box_refine=True,
as_two_stage=True,
data_preprocessor=dict(
type='DetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_mask=False,
),
language_model=dict(
type='BertModel',
name=lang_model_name,
pad_to_max=False,
use_sub_sentence_represent=True,
special_tokens_list=['[CLS]', '[SEP]', '.', '?'],
add_pooling_layer=False,
),
backbone=dict(
type='SwinTransformer',
embed_dims=96,
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
window_size=7,
mlp_ratio=4,
qkv_bias=True,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.2,
patch_norm=True,
out_indices=(1, 2, 3),
with_cp=True,
convert_weights=False),
neck=dict(
type='ChannelMapper',
in_channels=[192, 384, 768],
kernel_size=1,
out_channels=256,
act_cfg=None,
bias=True,
norm_cfg=dict(type='GN', num_groups=32),
num_outs=4),
encoder=dict(
num_layers=6,
num_cp=6,
# visual layer config
layer_cfg=dict(
self_attn_cfg=dict(embed_dims=256, num_levels=4, dropout=0.0),
ffn_cfg=dict(
embed_dims=256, feedforward_channels=2048, ffn_drop=0.0)),
# text layer config
text_layer_cfg=dict(
self_attn_cfg=dict(num_heads=4, embed_dims=256, dropout=0.0),
ffn_cfg=dict(
embed_dims=256, feedforward_channels=1024, ffn_drop=0.0)),
# fusion layer config
fusion_layer_cfg=dict(
v_dim=256,
l_dim=256,
embed_dim=1024,
num_heads=4,
init_values=1e-4),
),
decoder=dict(
num_layers=6,
return_intermediate=True,
layer_cfg=dict(
# query self attention layer
self_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0),
# cross attention layer query to text
cross_attn_text_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0),
# cross attention layer query to image
cross_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0),
ffn_cfg=dict(
embed_dims=256, feedforward_channels=2048, ffn_drop=0.0)),
post_norm_cfg=None),
positional_encoding=dict(
num_feats=128, normalize=True, offset=0.0, temperature=20),
bbox_head=dict(
type='GroundingDINOHead',
num_classes=80,
sync_cls_avg_factor=True,
contrastive_cfg=dict(max_text_len=256, log_scale=0.0, bias=False),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0), # 2.0 in DeformDETR
loss_bbox=dict(type='L1Loss', loss_weight=5.0),
loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
dn_cfg=dict( # TODO: Move to model.train_cfg ?
label_noise_scale=0.5,
box_noise_scale=1.0, # 0.4 for DN-DETR
group_cfg=dict(dynamic=True, num_groups=None,
num_dn_queries=100)), # TODO: half num_dn_queries
# training and testing settings
train_cfg=dict(
assigner=dict(
type='HungarianAssigner',
match_costs=[
dict(type='BinaryFocalLossCost', weight=2.0),
dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
dict(type='IoUCost', iou_mode='giou', weight=2.0)
])),
test_cfg=dict(max_per_img=300))
# dataset settings
train_pipeline = [
dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='RandomFlip', prob=0.5),
dict(
type='RandomChoice',
transforms=[
[
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
(736, 1333), (768, 1333), (800, 1333)],
keep_ratio=True)
],
[
dict(
type='RandomChoiceResize',
# The radio of all image in train dataset < 7
# follow the original implement
scales=[(400, 4200), (500, 4200), (600, 4200)],
keep_ratio=True),
dict(
type='RandomCrop',
crop_type='absolute_range',
crop_size=(384, 600),
allow_negative_crop=True),
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
(736, 1333), (768, 1333), (800, 1333)],
keep_ratio=True)
]
]),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction', 'text',
'custom_entities'))
]
test_pipeline = [
dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
dict(type='FixScaleResize', scale=(800, 1333), keep_ratio=True),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'text', 'custom_entities'))
]
train_dataloader = dict(
dataset=dict(
filter_cfg=dict(filter_empty_gt=False),
pipeline=train_pipeline,
return_classes=True))
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, return_classes=True))
test_dataloader = val_dataloader
optim_wrapper = dict(
_delete_=True,
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict(custom_keys={
'absolute_pos_embed': dict(decay_mult=0.),
'backbone': dict(lr_mult=0.1)
}))
# learning policy
max_epochs = 12
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=max_epochs,
by_epoch=True,
milestones=[11],
gamma=0.1)
]
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# USER SHOULD NOT CHANGE ITS VALUES.
# base_batch_size = (16 GPUs) x (2 samples per GPU)
auto_scale_lr = dict(base_batch_size=32)
_base_ = 'grounding_dino_swin-t_finetune_16xb2_1x_coco.py'
data_root = 'data/cat/'
class_name = ('cat', )
num_classes = len(class_name)
metainfo = dict(classes=class_name, palette=[(220, 20, 60)])
model = dict(bbox_head=dict(num_classes=num_classes))
train_dataloader = dict(
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='annotations/trainval.json',
data_prefix=dict(img='images/')))
val_dataloader = dict(
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='annotations/test.json',
data_prefix=dict(img='images/')))
test_dataloader = val_dataloader
val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
test_evaluator = val_evaluator
max_epoch = 20
default_hooks = dict(
checkpoint=dict(interval=1, max_keep_ckpts=1, save_best='auto'),
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(max_epochs=max_epoch, val_interval=1)
param_scheduler = [
dict(type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=30),
dict(
type='MultiStepLR',
begin=0,
end=max_epoch,
by_epoch=True,
milestones=[15],
gamma=0.1)
]
optim_wrapper = dict(
optimizer=dict(lr=0.00005),
paramwise_cfg=dict(
custom_keys={
'absolute_pos_embed': dict(decay_mult=0.),
'backbone': dict(lr_mult=0.1),
'language_model': dict(lr_mult=0),
}))
auto_scale_lr = dict(base_batch_size=16)
_base_ = [
'../_base_/datasets/coco_detection.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
lang_model_name = 'bert-base-uncased'
model = dict(
type='GroundingDINO',
num_queries=900,
with_box_refine=True,
as_two_stage=True,
data_preprocessor=dict(
type='DetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_mask=False,
),
language_model=dict(
type='BertModel',
name=lang_model_name,
pad_to_max=False,
use_sub_sentence_represent=True,
special_tokens_list=['[CLS]', '[SEP]', '.', '?'],
add_pooling_layer=True,
),
backbone=dict(
type='SwinTransformer',
embed_dims=96,
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
window_size=7,
mlp_ratio=4,
qkv_bias=True,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.2,
patch_norm=True,
out_indices=(1, 2, 3),
with_cp=False,
convert_weights=False),
neck=dict(
type='ChannelMapper',
in_channels=[192, 384, 768],
kernel_size=1,
out_channels=256,
act_cfg=None,
bias=True,
norm_cfg=dict(type='GN', num_groups=32),
num_outs=4),
encoder=dict(
num_layers=6,
# visual layer config
layer_cfg=dict(
self_attn_cfg=dict(embed_dims=256, num_levels=4, dropout=0.0),
ffn_cfg=dict(
embed_dims=256, feedforward_channels=2048, ffn_drop=0.0)),
# text layer config
text_layer_cfg=dict(
self_attn_cfg=dict(num_heads=4, embed_dims=256, dropout=0.0),
ffn_cfg=dict(
embed_dims=256, feedforward_channels=1024, ffn_drop=0.0)),
# fusion layer config
fusion_layer_cfg=dict(
v_dim=256,
l_dim=256,
embed_dim=1024,
num_heads=4,
init_values=1e-4),
),
decoder=dict(
num_layers=6,
return_intermediate=True,
layer_cfg=dict(
# query self attention layer
self_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0),
# cross attention layer query to text
cross_attn_text_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0),
# cross attention layer query to image
cross_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0),
ffn_cfg=dict(
embed_dims=256, feedforward_channels=2048, ffn_drop=0.0)),
post_norm_cfg=None),
positional_encoding=dict(
num_feats=128, normalize=True, offset=0.0, temperature=20),
bbox_head=dict(
type='GroundingDINOHead',
num_classes=80,
sync_cls_avg_factor=True,
contrastive_cfg=dict(max_text_len=256),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0), # 2.0 in DeformDETR
loss_bbox=dict(type='L1Loss', loss_weight=5.0)),
dn_cfg=dict( # TODO: Move to model.train_cfg ?
label_noise_scale=0.5,
box_noise_scale=1.0, # 0.4 for DN-DETR
group_cfg=dict(dynamic=True, num_groups=None,
num_dn_queries=100)), # TODO: half num_dn_queries
# training and testing settings
train_cfg=None,
test_cfg=dict(max_per_img=300))
test_pipeline = [
dict(
type='LoadImageFromFile', backend_args=None,
imdecode_backend='pillow'),
dict(
type='FixScaleResize',
scale=(800, 1333),
keep_ratio=True,
backend='pillow'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'text', 'custom_entities',
'tokens_positive'))
]
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, return_classes=True))
test_dataloader = val_dataloader
_base_ = './grounding_dino_swin-t_pretrain_zeroshot_lvis.py'
model = dict(
type='GroundingDINO',
backbone=dict(
pretrain_img_size=384,
embed_dims=128,
depths=[2, 2, 18, 2],
num_heads=[4, 8, 16, 32],
window_size=12,
drop_path_rate=0.3,
patch_norm=True),
neck=dict(in_channels=[256, 512, 1024]),
)
_base_ = './grounding_dino_swin-t_pretrain_zeroshot_mini-lvis.py'
model = dict(
type='GroundingDINO',
backbone=dict(
pretrain_img_size=384,
embed_dims=128,
depths=[2, 2, 18, 2],
num_heads=[4, 8, 16, 32],
window_size=12,
drop_path_rate=0.3,
patch_norm=True),
neck=dict(in_channels=[256, 512, 1024]),
)
_base_ = '../grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py'
model = dict(test_cfg=dict(
max_per_img=300,
chunked_size=40,
))
dataset_type = 'LVISV1Dataset'
data_root = 'data/coco/'
val_dataloader = dict(
dataset=dict(
data_root=data_root,
type=dataset_type,
ann_file='annotations/lvis_od_val.json',
data_prefix=dict(img='')))
test_dataloader = val_dataloader
# numpy < 1.24.0
val_evaluator = dict(
_delete_=True,
type='LVISFixedAPMetric',
ann_file=data_root + 'annotations/lvis_od_val.json')
test_evaluator = val_evaluator
_base_ = '../grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py'
model = dict(test_cfg=dict(
max_per_img=300,
chunked_size=40,
))
dataset_type = 'LVISV1Dataset'
data_root = 'data/coco/'
val_dataloader = dict(
dataset=dict(
data_root=data_root,
type=dataset_type,
ann_file='annotations/lvis_v1_minival_inserted_image_name.json',
data_prefix=dict(img='')))
test_dataloader = val_dataloader
# numpy < 1.24.0
val_evaluator = dict(
_delete_=True,
type='LVISFixedAPMetric',
ann_file=data_root +
'annotations/lvis_v1_minival_inserted_image_name.json')
test_evaluator = val_evaluator
Collections:
- Name: Grounding DINO
Metadata:
Training Data: Objects365, GoldG, CC3M and COCO
Training Techniques:
- AdamW
- Multi Scale Train
- Gradient Clip
Training Resources: 3090 GPUs
Architecture:
- Swin Transformer
- BERT
Paper:
URL: https://arxiv.org/abs/2303.05499
Title: 'Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection
'
README: configs/grounding_dino/README.md
Code:
URL:
Version: v3.0.0
Models:
- Name: grounding_dino_swin-t_pretrain_obj365_goldg_cap4m
In Collection: Grounding DINO
Config: configs/grounding_dino/grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 48.5
Weights: https://download.openmmlab.com/mmdetection/v3.0/grounding_dino/groundingdino_swint_ogc_mmdet-822d7e9d.pth
- Name: grounding_dino_swin-b_pretrain_mixeddata
In Collection: Grounding DINO
Config: configs/grounding_dino/grounding_dino_swin-b_pretrain_mixeddata.py
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 56.9
Weights: https://download.openmmlab.com/mmdetection/v3.0/grounding_dino/groundingdino_swinb_cogcoor_mmdet-55949c9c.pth
- Name: grounding_dino_swin-t_finetune_16xb2_1x_coco
In Collection: Grounding DINO
Config: configs/grounding_dino/grounding_dino_swin-t_finetune_16xb2_1x_coco.py
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 58.1
Weights: https://download.openmmlab.com/mmdetection/v3.0/grounding_dino/grounding_dino_swin-t_finetune_16xb2_1x_coco/grounding_dino_swin-t_finetune_16xb2_1x_coco_20230921_152544-5f234b20.pth
- Name: grounding_dino_swin-b_finetune_16xb2_1x_coco
In Collection: Grounding DINO
Config: configs/grounding_dino/grounding_dino_swin-b_finetune_16xb2_1x_coco.py
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 59.7
Weights: https://download.openmmlab.com/mmdetection/v3.0/grounding_dino/grounding_dino_swin-b_finetune_16xb2_1x_coco/grounding_dino_swin-b_finetune_16xb2_1x_coco_20230921_153201-f219e0c0.pth
- Name: grounding_dino_r50_scratch_8xb2_1x_coco
In Collection: Grounding DINO
Config: configs/grounding_dino/grounding_dino_r50_scratch_8xb2_1x_coco.py
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 48.9
Weights: https://download.openmmlab.com/mmdetection/v3.0/grounding_dino/grounding_dino_r50_scratch_8xb2_1x_coco/grounding_dino_r50_scratch_1x_coco-fe0002f2.pth
_base_ = '../grounding_dino_swin-b_pretrain_mixeddata.py'
dataset_type = 'CocoDataset'
data_root = 'data/odinw/'
base_test_pipeline = _base_.test_pipeline
base_test_pipeline[-1]['meta_keys'] = ('img_id', 'img_path', 'ori_shape',
'img_shape', 'scale_factor', 'text',
'custom_entities', 'caption_prompt')
# ---------------------1 AerialMaritimeDrone---------------------#
class_name = ('boat', 'car', 'dock', 'jetski', 'lift')
metainfo = dict(classes=class_name)
_data_root = data_root + 'AerialMaritimeDrone/large/'
dataset_AerialMaritimeDrone = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
test_mode=True,
pipeline=base_test_pipeline,
return_classes=True)
val_evaluator_AerialMaritimeDrone = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------2 Aquarium---------------------#
class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish',
'stingray')
metainfo = dict(classes=class_name)
_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/'
caption_prompt = None
# caption_prompt = {
# 'penguin': {
# 'suffix': ', which is black and white'
# },
# 'puffin': {
# 'suffix': ' with orange beaks'
# },
# 'stingray': {
# 'suffix': ' which is flat and round'
# },
# }
dataset_Aquarium = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_Aquarium = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------3 CottontailRabbits---------------------#
class_name = ('Cottontail-Rabbit', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'CottontailRabbits/'
caption_prompt = None
# caption_prompt = {'Cottontail-Rabbit': {'name': 'rabbit'}}
dataset_CottontailRabbits = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_CottontailRabbits = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------4 EgoHands---------------------#
class_name = ('hand', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'EgoHands/generic/'
caption_prompt = None
# caption_prompt = {'hand': {'suffix': ' of a person'}}
dataset_EgoHands = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_EgoHands = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------5 NorthAmericaMushrooms---------------------#
class_name = ('CoW', 'chanterelle')
metainfo = dict(classes=class_name)
_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/' # noqa
caption_prompt = None
# caption_prompt = {
# 'CoW': {
# 'name': 'flat mushroom'
# },
# 'chanterelle': {
# 'name': 'yellow mushroom'
# }
# }
dataset_NorthAmericaMushrooms = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_NorthAmericaMushrooms = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------6 Packages---------------------#
class_name = ('package', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'Packages/Raw/'
caption_prompt = None
# caption_prompt = {
# 'package': {
# 'prefix': 'there is a ',
# 'suffix': ' on the porch'
# }
# }
dataset_Packages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_Packages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------7 PascalVOC---------------------#
class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
'tvmonitor')
metainfo = dict(classes=class_name)
_data_root = data_root + 'PascalVOC/'
dataset_PascalVOC = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_PascalVOC = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------8 pistols---------------------#
class_name = ('pistol', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'pistols/export/'
dataset_pistols = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_pistols = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------9 pothole---------------------#
class_name = ('pothole', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'pothole/'
caption_prompt = None
# caption_prompt = {
# 'pothole': {
# 'prefix': 'there are some ',
# 'name': 'holes',
# 'suffix': ' on the road'
# }
# }
dataset_pothole = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_pothole = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------10 Raccoon---------------------#
class_name = ('raccoon', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/'
dataset_Raccoon = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_Raccoon = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------11 ShellfishOpenImages---------------------#
class_name = ('Crab', 'Lobster', 'Shrimp')
metainfo = dict(classes=class_name)
_data_root = data_root + 'ShellfishOpenImages/raw/'
dataset_ShellfishOpenImages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_ShellfishOpenImages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------12 thermalDogsAndPeople---------------------#
class_name = ('dog', 'person')
metainfo = dict(classes=class_name)
_data_root = data_root + 'thermalDogsAndPeople/'
dataset_thermalDogsAndPeople = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_thermalDogsAndPeople = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------13 VehiclesOpenImages---------------------#
class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck')
metainfo = dict(classes=class_name)
_data_root = data_root + 'VehiclesOpenImages/416x416/'
dataset_VehiclesOpenImages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_VehiclesOpenImages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# --------------------- Config---------------------#
dataset_prefixes = [
'AerialMaritimeDrone', 'Aquarium', 'CottontailRabbits', 'EgoHands',
'NorthAmericaMushrooms', 'Packages', 'PascalVOC', 'pistols', 'pothole',
'Raccoon', 'ShellfishOpenImages', 'thermalDogsAndPeople',
'VehiclesOpenImages'
]
datasets = [
dataset_AerialMaritimeDrone, dataset_Aquarium, dataset_CottontailRabbits,
dataset_EgoHands, dataset_NorthAmericaMushrooms, dataset_Packages,
dataset_PascalVOC, dataset_pistols, dataset_pothole, dataset_Raccoon,
dataset_ShellfishOpenImages, dataset_thermalDogsAndPeople,
dataset_VehiclesOpenImages
]
metrics = [
val_evaluator_AerialMaritimeDrone, val_evaluator_Aquarium,
val_evaluator_CottontailRabbits, val_evaluator_EgoHands,
val_evaluator_NorthAmericaMushrooms, val_evaluator_Packages,
val_evaluator_PascalVOC, val_evaluator_pistols, val_evaluator_pothole,
val_evaluator_Raccoon, val_evaluator_ShellfishOpenImages,
val_evaluator_thermalDogsAndPeople, val_evaluator_VehiclesOpenImages
]
# -------------------------------------------------#
val_dataloader = dict(
dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets))
test_dataloader = val_dataloader
val_evaluator = dict(
_delete_=True,
type='MultiDatasetsEvaluator',
metrics=metrics,
dataset_prefixes=dataset_prefixes)
test_evaluator = val_evaluator
_base_ = '../grounding_dino_swin-b_pretrain_mixeddata.py'
dataset_type = 'CocoDataset'
data_root = 'data/odinw/'
base_test_pipeline = _base_.test_pipeline
base_test_pipeline[-1]['meta_keys'] = ('img_id', 'img_path', 'ori_shape',
'img_shape', 'scale_factor', 'text',
'custom_entities', 'caption_prompt')
# ---------------------1 AerialMaritimeDrone_large---------------------#
class_name = ('boat', 'car', 'dock', 'jetski', 'lift')
metainfo = dict(classes=class_name)
_data_root = data_root + 'AerialMaritimeDrone/large/'
dataset_AerialMaritimeDrone_large = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_AerialMaritimeDrone_large = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------2 AerialMaritimeDrone_tiled---------------------#
class_name = ('boat', 'car', 'dock', 'jetski', 'lift')
metainfo = dict(classes=class_name)
_data_root = data_root + 'AerialMaritimeDrone/tiled/'
dataset_AerialMaritimeDrone_tiled = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_AerialMaritimeDrone_tiled = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------3 AmericanSignLanguageLetters---------------------#
class_name = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z')
metainfo = dict(classes=class_name)
_data_root = data_root + 'AmericanSignLanguageLetters/American Sign Language Letters.v1-v1.coco/' # noqa
dataset_AmericanSignLanguageLetters = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_AmericanSignLanguageLetters = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------4 Aquarium---------------------#
class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish',
'stingray')
metainfo = dict(classes=class_name)
_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/'
dataset_Aquarium = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_Aquarium = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------5 BCCD---------------------#
class_name = ('Platelets', 'RBC', 'WBC')
metainfo = dict(classes=class_name)
_data_root = data_root + 'BCCD/BCCD.v3-raw.coco/'
dataset_BCCD = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_BCCD = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------6 boggleBoards---------------------#
class_name = ('Q', 'a', 'an', 'b', 'c', 'd', 'e', 'er', 'f', 'g', 'h', 'he',
'i', 'in', 'j', 'k', 'l', 'm', 'n', 'o', 'o ', 'p', 'q', 'qu',
'r', 's', 't', 't\\', 'th', 'u', 'v', 'w', 'wild', 'x', 'y', 'z')
metainfo = dict(classes=class_name)
_data_root = data_root + 'boggleBoards/416x416AutoOrient/export/'
dataset_boggleBoards = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_boggleBoards = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------7 brackishUnderwater---------------------#
class_name = ('crab', 'fish', 'jellyfish', 'shrimp', 'small_fish', 'starfish')
metainfo = dict(classes=class_name)
_data_root = data_root + 'brackishUnderwater/960x540/'
dataset_brackishUnderwater = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_brackishUnderwater = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------8 ChessPieces---------------------#
class_name = (' ', 'black bishop', 'black king', 'black knight', 'black pawn',
'black queen', 'black rook', 'white bishop', 'white king',
'white knight', 'white pawn', 'white queen', 'white rook')
metainfo = dict(classes=class_name)
_data_root = data_root + 'ChessPieces/Chess Pieces.v23-raw.coco/'
dataset_ChessPieces = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/new_annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_ChessPieces = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/new_annotations_without_background.json',
metric='bbox')
# ---------------------9 CottontailRabbits---------------------#
class_name = ('rabbit', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'CottontailRabbits/'
dataset_CottontailRabbits = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/new_annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_CottontailRabbits = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/new_annotations_without_background.json',
metric='bbox')
# ---------------------10 dice---------------------#
class_name = ('1', '2', '3', '4', '5', '6')
metainfo = dict(classes=class_name)
_data_root = data_root + 'dice/mediumColor/export/'
dataset_dice = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_dice = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------11 DroneControl---------------------#
class_name = ('follow', 'follow_hand', 'land', 'land_hand', 'null', 'object',
'takeoff', 'takeoff-hand')
metainfo = dict(classes=class_name)
_data_root = data_root + 'DroneControl/Drone Control.v3-raw.coco/'
dataset_DroneControl = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_DroneControl = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------12 EgoHands_generic---------------------#
class_name = ('hand', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'EgoHands/generic/'
caption_prompt = {'hand': {'suffix': ' of a person'}}
dataset_EgoHands_generic = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
# NOTE w. prompt 0.548; wo. prompt 0.764
# caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_EgoHands_generic = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------13 EgoHands_specific---------------------#
class_name = ('myleft', 'myright', 'yourleft', 'yourright')
metainfo = dict(classes=class_name)
_data_root = data_root + 'EgoHands/specific/'
dataset_EgoHands_specific = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_EgoHands_specific = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------14 HardHatWorkers---------------------#
class_name = ('head', 'helmet', 'person')
metainfo = dict(classes=class_name)
_data_root = data_root + 'HardHatWorkers/raw/'
dataset_HardHatWorkers = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_HardHatWorkers = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------15 MaskWearing---------------------#
class_name = ('mask', 'no-mask')
metainfo = dict(classes=class_name)
_data_root = data_root + 'MaskWearing/raw/'
dataset_MaskWearing = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_MaskWearing = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------16 MountainDewCommercial---------------------#
class_name = ('bottle', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'MountainDewCommercial/'
dataset_MountainDewCommercial = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_MountainDewCommercial = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------17 NorthAmericaMushrooms---------------------#
class_name = ('flat mushroom', 'yellow mushroom')
metainfo = dict(classes=class_name)
_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/' # noqa
dataset_NorthAmericaMushrooms = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/new_annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_NorthAmericaMushrooms = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/new_annotations_without_background.json',
metric='bbox')
# ---------------------18 openPoetryVision---------------------#
class_name = ('American Typewriter', 'Andale Mono', 'Apple Chancery', 'Arial',
'Avenir', 'Baskerville', 'Big Caslon', 'Bradley Hand',
'Brush Script MT', 'Chalkboard', 'Comic Sans MS', 'Copperplate',
'Courier', 'Didot', 'Futura', 'Geneva', 'Georgia', 'Gill Sans',
'Helvetica', 'Herculanum', 'Impact', 'Kefa', 'Lucida Grande',
'Luminari', 'Marker Felt', 'Menlo', 'Monaco', 'Noteworthy',
'Optima', 'PT Sans', 'PT Serif', 'Palatino', 'Papyrus',
'Phosphate', 'Rockwell', 'SF Pro', 'SignPainter', 'Skia',
'Snell Roundhand', 'Tahoma', 'Times New Roman', 'Trebuchet MS',
'Verdana')
metainfo = dict(classes=class_name)
_data_root = data_root + 'openPoetryVision/512x512/'
dataset_openPoetryVision = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_openPoetryVision = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------19 OxfordPets_by_breed---------------------#
class_name = ('cat-Abyssinian', 'cat-Bengal', 'cat-Birman', 'cat-Bombay',
'cat-British_Shorthair', 'cat-Egyptian_Mau', 'cat-Maine_Coon',
'cat-Persian', 'cat-Ragdoll', 'cat-Russian_Blue', 'cat-Siamese',
'cat-Sphynx', 'dog-american_bulldog',
'dog-american_pit_bull_terrier', 'dog-basset_hound',
'dog-beagle', 'dog-boxer', 'dog-chihuahua',
'dog-english_cocker_spaniel', 'dog-english_setter',
'dog-german_shorthaired', 'dog-great_pyrenees', 'dog-havanese',
'dog-japanese_chin', 'dog-keeshond', 'dog-leonberger',
'dog-miniature_pinscher', 'dog-newfoundland', 'dog-pomeranian',
'dog-pug', 'dog-saint_bernard', 'dog-samoyed',
'dog-scottish_terrier', 'dog-shiba_inu',
'dog-staffordshire_bull_terrier', 'dog-wheaten_terrier',
'dog-yorkshire_terrier')
metainfo = dict(classes=class_name)
_data_root = data_root + 'OxfordPets/by-breed/' # noqa
dataset_OxfordPets_by_breed = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_OxfordPets_by_breed = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------20 OxfordPets_by_species---------------------#
class_name = ('cat', 'dog')
metainfo = dict(classes=class_name)
_data_root = data_root + 'OxfordPets/by-species/' # noqa
dataset_OxfordPets_by_species = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_OxfordPets_by_species = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------21 PKLot---------------------#
class_name = ('space-empty', 'space-occupied')
metainfo = dict(classes=class_name)
_data_root = data_root + 'PKLot/640/' # noqa
dataset_PKLot = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_PKLot = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------22 Packages---------------------#
class_name = ('package', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'Packages/Raw/'
caption_prompt = {
'package': {
'prefix': 'there is a ',
'suffix': ' on the porch'
}
}
dataset_Packages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt, # NOTE w. prompt 0.728; wo. prompt 0.670
test_mode=True,
return_classes=True)
val_evaluator_Packages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------23 PascalVOC---------------------#
class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
'tvmonitor')
metainfo = dict(classes=class_name)
_data_root = data_root + 'PascalVOC/'
dataset_PascalVOC = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_PascalVOC = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------24 pistols---------------------#
class_name = ('pistol', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'pistols/export/'
dataset_pistols = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_pistols = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------25 plantdoc---------------------#
class_name = ('Apple Scab Leaf', 'Apple leaf', 'Apple rust leaf',
'Bell_pepper leaf', 'Bell_pepper leaf spot', 'Blueberry leaf',
'Cherry leaf', 'Corn Gray leaf spot', 'Corn leaf blight',
'Corn rust leaf', 'Peach leaf', 'Potato leaf',
'Potato leaf early blight', 'Potato leaf late blight',
'Raspberry leaf', 'Soyabean leaf', 'Soybean leaf',
'Squash Powdery mildew leaf', 'Strawberry leaf',
'Tomato Early blight leaf', 'Tomato Septoria leaf spot',
'Tomato leaf', 'Tomato leaf bacterial spot',
'Tomato leaf late blight', 'Tomato leaf mosaic virus',
'Tomato leaf yellow virus', 'Tomato mold leaf',
'Tomato two spotted spider mites leaf', 'grape leaf',
'grape leaf black rot')
metainfo = dict(classes=class_name)
_data_root = data_root + 'plantdoc/416x416/'
dataset_plantdoc = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_plantdoc = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------26 pothole---------------------#
class_name = ('pothole', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'pothole/'
caption_prompt = {
'pothole': {
'name': 'holes',
'prefix': 'there are some ',
'suffix': ' on the road'
}
}
dataset_pothole = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
# NOTE w. prompt 0.221; wo. prompt 0.478
# caption_prompt=caption_prompt,
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_pothole = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------27 Raccoon---------------------#
class_name = ('raccoon', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/'
dataset_Raccoon = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_Raccoon = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------28 selfdrivingCar---------------------#
class_name = ('biker', 'car', 'pedestrian', 'trafficLight',
'trafficLight-Green', 'trafficLight-GreenLeft',
'trafficLight-Red', 'trafficLight-RedLeft',
'trafficLight-Yellow', 'trafficLight-YellowLeft', 'truck')
metainfo = dict(classes=class_name)
_data_root = data_root + 'selfdrivingCar/fixedLarge/export/'
dataset_selfdrivingCar = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_selfdrivingCar = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------29 ShellfishOpenImages---------------------#
class_name = ('Crab', 'Lobster', 'Shrimp')
metainfo = dict(classes=class_name)
_data_root = data_root + 'ShellfishOpenImages/raw/'
dataset_ShellfishOpenImages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_ShellfishOpenImages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------30 ThermalCheetah---------------------#
class_name = ('cheetah', 'human')
metainfo = dict(classes=class_name)
_data_root = data_root + 'ThermalCheetah/'
dataset_ThermalCheetah = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_ThermalCheetah = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------31 thermalDogsAndPeople---------------------#
class_name = ('dog', 'person')
metainfo = dict(classes=class_name)
_data_root = data_root + 'thermalDogsAndPeople/'
dataset_thermalDogsAndPeople = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_thermalDogsAndPeople = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------32 UnoCards---------------------#
class_name = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11',
'12', '13', '14')
metainfo = dict(classes=class_name)
_data_root = data_root + 'UnoCards/raw/'
dataset_UnoCards = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_UnoCards = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------33 VehiclesOpenImages---------------------#
class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck')
metainfo = dict(classes=class_name)
_data_root = data_root + 'VehiclesOpenImages/416x416/'
dataset_VehiclesOpenImages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_VehiclesOpenImages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------34 WildfireSmoke---------------------#
class_name = ('smoke', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'WildfireSmoke/'
dataset_WildfireSmoke = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_WildfireSmoke = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------35 websiteScreenshots---------------------#
class_name = ('button', 'field', 'heading', 'iframe', 'image', 'label', 'link',
'text')
metainfo = dict(classes=class_name)
_data_root = data_root + 'websiteScreenshots/'
dataset_websiteScreenshots = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_websiteScreenshots = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# --------------------- Config---------------------#
dataset_prefixes = [
'AerialMaritimeDrone_large',
'AerialMaritimeDrone_tiled',
'AmericanSignLanguageLetters',
'Aquarium',
'BCCD',
'boggleBoards',
'brackishUnderwater',
'ChessPieces',
'CottontailRabbits',
'dice',
'DroneControl',
'EgoHands_generic',
'EgoHands_specific',
'HardHatWorkers',
'MaskWearing',
'MountainDewCommercial',
'NorthAmericaMushrooms',
'openPoetryVision',
'OxfordPets_by_breed',
'OxfordPets_by_species',
'PKLot',
'Packages',
'PascalVOC',
'pistols',
'plantdoc',
'pothole',
'Raccoons',
'selfdrivingCar',
'ShellfishOpenImages',
'ThermalCheetah',
'thermalDogsAndPeople',
'UnoCards',
'VehiclesOpenImages',
'WildfireSmoke',
'websiteScreenshots',
]
datasets = [
dataset_AerialMaritimeDrone_large, dataset_AerialMaritimeDrone_tiled,
dataset_AmericanSignLanguageLetters, dataset_Aquarium, dataset_BCCD,
dataset_boggleBoards, dataset_brackishUnderwater, dataset_ChessPieces,
dataset_CottontailRabbits, dataset_dice, dataset_DroneControl,
dataset_EgoHands_generic, dataset_EgoHands_specific,
dataset_HardHatWorkers, dataset_MaskWearing, dataset_MountainDewCommercial,
dataset_NorthAmericaMushrooms, dataset_openPoetryVision,
dataset_OxfordPets_by_breed, dataset_OxfordPets_by_species, dataset_PKLot,
dataset_Packages, dataset_PascalVOC, dataset_pistols, dataset_plantdoc,
dataset_pothole, dataset_Raccoon, dataset_selfdrivingCar,
dataset_ShellfishOpenImages, dataset_ThermalCheetah,
dataset_thermalDogsAndPeople, dataset_UnoCards, dataset_VehiclesOpenImages,
dataset_WildfireSmoke, dataset_websiteScreenshots
]
metrics = [
val_evaluator_AerialMaritimeDrone_large,
val_evaluator_AerialMaritimeDrone_tiled,
val_evaluator_AmericanSignLanguageLetters, val_evaluator_Aquarium,
val_evaluator_BCCD, val_evaluator_boggleBoards,
val_evaluator_brackishUnderwater, val_evaluator_ChessPieces,
val_evaluator_CottontailRabbits, val_evaluator_dice,
val_evaluator_DroneControl, val_evaluator_EgoHands_generic,
val_evaluator_EgoHands_specific, val_evaluator_HardHatWorkers,
val_evaluator_MaskWearing, val_evaluator_MountainDewCommercial,
val_evaluator_NorthAmericaMushrooms, val_evaluator_openPoetryVision,
val_evaluator_OxfordPets_by_breed, val_evaluator_OxfordPets_by_species,
val_evaluator_PKLot, val_evaluator_Packages, val_evaluator_PascalVOC,
val_evaluator_pistols, val_evaluator_plantdoc, val_evaluator_pothole,
val_evaluator_Raccoon, val_evaluator_selfdrivingCar,
val_evaluator_ShellfishOpenImages, val_evaluator_ThermalCheetah,
val_evaluator_thermalDogsAndPeople, val_evaluator_UnoCards,
val_evaluator_VehiclesOpenImages, val_evaluator_WildfireSmoke,
val_evaluator_websiteScreenshots
]
# -------------------------------------------------#
val_dataloader = dict(
dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets))
test_dataloader = val_dataloader
val_evaluator = dict(
_delete_=True,
type='MultiDatasetsEvaluator',
metrics=metrics,
dataset_prefixes=dataset_prefixes)
test_evaluator = val_evaluator
_base_ = '../grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py' # noqa
dataset_type = 'CocoDataset'
data_root = 'data/odinw/'
base_test_pipeline = _base_.test_pipeline
base_test_pipeline[-1]['meta_keys'] = ('img_id', 'img_path', 'ori_shape',
'img_shape', 'scale_factor', 'text',
'custom_entities', 'caption_prompt')
# ---------------------1 AerialMaritimeDrone---------------------#
class_name = ('boat', 'car', 'dock', 'jetski', 'lift')
metainfo = dict(classes=class_name)
_data_root = data_root + 'AerialMaritimeDrone/large/'
dataset_AerialMaritimeDrone = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
test_mode=True,
pipeline=base_test_pipeline,
return_classes=True)
val_evaluator_AerialMaritimeDrone = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------2 Aquarium---------------------#
class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish',
'stingray')
metainfo = dict(classes=class_name)
_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/'
caption_prompt = None
# caption_prompt = {
# 'penguin': {
# 'suffix': ', which is black and white'
# },
# 'puffin': {
# 'suffix': ' with orange beaks'
# },
# 'stingray': {
# 'suffix': ' which is flat and round'
# },
# }
dataset_Aquarium = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_Aquarium = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------3 CottontailRabbits---------------------#
class_name = ('Cottontail-Rabbit', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'CottontailRabbits/'
caption_prompt = None
# caption_prompt = {'Cottontail-Rabbit': {'name': 'rabbit'}}
dataset_CottontailRabbits = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_CottontailRabbits = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------4 EgoHands---------------------#
class_name = ('hand', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'EgoHands/generic/'
caption_prompt = None
# caption_prompt = {'hand': {'suffix': ' of a person'}}
dataset_EgoHands = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_EgoHands = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------5 NorthAmericaMushrooms---------------------#
class_name = ('CoW', 'chanterelle')
metainfo = dict(classes=class_name)
_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/' # noqa
caption_prompt = None
# caption_prompt = {
# 'CoW': {
# 'name': 'flat mushroom'
# },
# 'chanterelle': {
# 'name': 'yellow mushroom'
# }
# }
dataset_NorthAmericaMushrooms = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_NorthAmericaMushrooms = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------6 Packages---------------------#
class_name = ('package', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'Packages/Raw/'
caption_prompt = None
# caption_prompt = {
# 'package': {
# 'prefix': 'there is a ',
# 'suffix': ' on the porch'
# }
# }
dataset_Packages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_Packages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------7 PascalVOC---------------------#
class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
'tvmonitor')
metainfo = dict(classes=class_name)
_data_root = data_root + 'PascalVOC/'
dataset_PascalVOC = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_PascalVOC = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------8 pistols---------------------#
class_name = ('pistol', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'pistols/export/'
dataset_pistols = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_pistols = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------9 pothole---------------------#
class_name = ('pothole', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'pothole/'
caption_prompt = None
# caption_prompt = {
# 'pothole': {
# 'prefix': 'there are some ',
# 'name': 'holes',
# 'suffix': ' on the road'
# }
# }
dataset_pothole = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_pothole = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------10 Raccoon---------------------#
class_name = ('raccoon', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/'
dataset_Raccoon = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_Raccoon = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------11 ShellfishOpenImages---------------------#
class_name = ('Crab', 'Lobster', 'Shrimp')
metainfo = dict(classes=class_name)
_data_root = data_root + 'ShellfishOpenImages/raw/'
dataset_ShellfishOpenImages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_ShellfishOpenImages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------12 thermalDogsAndPeople---------------------#
class_name = ('dog', 'person')
metainfo = dict(classes=class_name)
_data_root = data_root + 'thermalDogsAndPeople/'
dataset_thermalDogsAndPeople = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_thermalDogsAndPeople = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------13 VehiclesOpenImages---------------------#
class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck')
metainfo = dict(classes=class_name)
_data_root = data_root + 'VehiclesOpenImages/416x416/'
dataset_VehiclesOpenImages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_VehiclesOpenImages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# --------------------- Config---------------------#
dataset_prefixes = [
'AerialMaritimeDrone', 'Aquarium', 'CottontailRabbits', 'EgoHands',
'NorthAmericaMushrooms', 'Packages', 'PascalVOC', 'pistols', 'pothole',
'Raccoon', 'ShellfishOpenImages', 'thermalDogsAndPeople',
'VehiclesOpenImages'
]
datasets = [
dataset_AerialMaritimeDrone, dataset_Aquarium, dataset_CottontailRabbits,
dataset_EgoHands, dataset_NorthAmericaMushrooms, dataset_Packages,
dataset_PascalVOC, dataset_pistols, dataset_pothole, dataset_Raccoon,
dataset_ShellfishOpenImages, dataset_thermalDogsAndPeople,
dataset_VehiclesOpenImages
]
metrics = [
val_evaluator_AerialMaritimeDrone, val_evaluator_Aquarium,
val_evaluator_CottontailRabbits, val_evaluator_EgoHands,
val_evaluator_NorthAmericaMushrooms, val_evaluator_Packages,
val_evaluator_PascalVOC, val_evaluator_pistols, val_evaluator_pothole,
val_evaluator_Raccoon, val_evaluator_ShellfishOpenImages,
val_evaluator_thermalDogsAndPeople, val_evaluator_VehiclesOpenImages
]
# -------------------------------------------------#
val_dataloader = dict(
dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets))
test_dataloader = val_dataloader
val_evaluator = dict(
_delete_=True,
type='MultiDatasetsEvaluator',
metrics=metrics,
dataset_prefixes=dataset_prefixes)
test_evaluator = val_evaluator
_base_ = '../grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py' # noqa
dataset_type = 'CocoDataset'
data_root = 'data/odinw/'
base_test_pipeline = _base_.test_pipeline
base_test_pipeline[-1]['meta_keys'] = ('img_id', 'img_path', 'ori_shape',
'img_shape', 'scale_factor', 'text',
'custom_entities', 'caption_prompt')
# ---------------------1 AerialMaritimeDrone_large---------------------#
class_name = ('boat', 'car', 'dock', 'jetski', 'lift')
metainfo = dict(classes=class_name)
_data_root = data_root + 'AerialMaritimeDrone/large/'
dataset_AerialMaritimeDrone_large = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_AerialMaritimeDrone_large = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------2 AerialMaritimeDrone_tiled---------------------#
class_name = ('boat', 'car', 'dock', 'jetski', 'lift')
metainfo = dict(classes=class_name)
_data_root = data_root + 'AerialMaritimeDrone/tiled/'
dataset_AerialMaritimeDrone_tiled = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_AerialMaritimeDrone_tiled = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------3 AmericanSignLanguageLetters---------------------#
class_name = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z')
metainfo = dict(classes=class_name)
_data_root = data_root + 'AmericanSignLanguageLetters/American Sign Language Letters.v1-v1.coco/' # noqa
dataset_AmericanSignLanguageLetters = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_AmericanSignLanguageLetters = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------4 Aquarium---------------------#
class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish',
'stingray')
metainfo = dict(classes=class_name)
_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/'
dataset_Aquarium = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_Aquarium = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------5 BCCD---------------------#
class_name = ('Platelets', 'RBC', 'WBC')
metainfo = dict(classes=class_name)
_data_root = data_root + 'BCCD/BCCD.v3-raw.coco/'
dataset_BCCD = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_BCCD = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------6 boggleBoards---------------------#
class_name = ('Q', 'a', 'an', 'b', 'c', 'd', 'e', 'er', 'f', 'g', 'h', 'he',
'i', 'in', 'j', 'k', 'l', 'm', 'n', 'o', 'o ', 'p', 'q', 'qu',
'r', 's', 't', 't\\', 'th', 'u', 'v', 'w', 'wild', 'x', 'y', 'z')
metainfo = dict(classes=class_name)
_data_root = data_root + 'boggleBoards/416x416AutoOrient/export/'
dataset_boggleBoards = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_boggleBoards = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------7 brackishUnderwater---------------------#
class_name = ('crab', 'fish', 'jellyfish', 'shrimp', 'small_fish', 'starfish')
metainfo = dict(classes=class_name)
_data_root = data_root + 'brackishUnderwater/960x540/'
dataset_brackishUnderwater = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_brackishUnderwater = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------8 ChessPieces---------------------#
class_name = (' ', 'black bishop', 'black king', 'black knight', 'black pawn',
'black queen', 'black rook', 'white bishop', 'white king',
'white knight', 'white pawn', 'white queen', 'white rook')
metainfo = dict(classes=class_name)
_data_root = data_root + 'ChessPieces/Chess Pieces.v23-raw.coco/'
dataset_ChessPieces = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/new_annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_ChessPieces = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/new_annotations_without_background.json',
metric='bbox')
# ---------------------9 CottontailRabbits---------------------#
class_name = ('rabbit', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'CottontailRabbits/'
dataset_CottontailRabbits = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/new_annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_CottontailRabbits = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/new_annotations_without_background.json',
metric='bbox')
# ---------------------10 dice---------------------#
class_name = ('1', '2', '3', '4', '5', '6')
metainfo = dict(classes=class_name)
_data_root = data_root + 'dice/mediumColor/export/'
dataset_dice = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_dice = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------11 DroneControl---------------------#
class_name = ('follow', 'follow_hand', 'land', 'land_hand', 'null', 'object',
'takeoff', 'takeoff-hand')
metainfo = dict(classes=class_name)
_data_root = data_root + 'DroneControl/Drone Control.v3-raw.coco/'
dataset_DroneControl = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_DroneControl = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------12 EgoHands_generic---------------------#
class_name = ('hand', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'EgoHands/generic/'
caption_prompt = {'hand': {'suffix': ' of a person'}}
dataset_EgoHands_generic = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
# NOTE w. prompt 0.526, wo. prompt 0.608
# caption_prompt=caption_prompt,
test_mode=True,
return_classes=True)
val_evaluator_EgoHands_generic = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------13 EgoHands_specific---------------------#
class_name = ('myleft', 'myright', 'yourleft', 'yourright')
metainfo = dict(classes=class_name)
_data_root = data_root + 'EgoHands/specific/'
dataset_EgoHands_specific = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_EgoHands_specific = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------14 HardHatWorkers---------------------#
class_name = ('head', 'helmet', 'person')
metainfo = dict(classes=class_name)
_data_root = data_root + 'HardHatWorkers/raw/'
dataset_HardHatWorkers = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_HardHatWorkers = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------15 MaskWearing---------------------#
class_name = ('mask', 'no-mask')
metainfo = dict(classes=class_name)
_data_root = data_root + 'MaskWearing/raw/'
dataset_MaskWearing = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_MaskWearing = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------16 MountainDewCommercial---------------------#
class_name = ('bottle', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'MountainDewCommercial/'
dataset_MountainDewCommercial = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_MountainDewCommercial = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------17 NorthAmericaMushrooms---------------------#
class_name = ('flat mushroom', 'yellow mushroom')
metainfo = dict(classes=class_name)
_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/' # noqa
dataset_NorthAmericaMushrooms = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/new_annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_NorthAmericaMushrooms = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/new_annotations_without_background.json',
metric='bbox')
# ---------------------18 openPoetryVision---------------------#
class_name = ('American Typewriter', 'Andale Mono', 'Apple Chancery', 'Arial',
'Avenir', 'Baskerville', 'Big Caslon', 'Bradley Hand',
'Brush Script MT', 'Chalkboard', 'Comic Sans MS', 'Copperplate',
'Courier', 'Didot', 'Futura', 'Geneva', 'Georgia', 'Gill Sans',
'Helvetica', 'Herculanum', 'Impact', 'Kefa', 'Lucida Grande',
'Luminari', 'Marker Felt', 'Menlo', 'Monaco', 'Noteworthy',
'Optima', 'PT Sans', 'PT Serif', 'Palatino', 'Papyrus',
'Phosphate', 'Rockwell', 'SF Pro', 'SignPainter', 'Skia',
'Snell Roundhand', 'Tahoma', 'Times New Roman', 'Trebuchet MS',
'Verdana')
metainfo = dict(classes=class_name)
_data_root = data_root + 'openPoetryVision/512x512/'
dataset_openPoetryVision = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_openPoetryVision = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------19 OxfordPets_by_breed---------------------#
class_name = ('cat-Abyssinian', 'cat-Bengal', 'cat-Birman', 'cat-Bombay',
'cat-British_Shorthair', 'cat-Egyptian_Mau', 'cat-Maine_Coon',
'cat-Persian', 'cat-Ragdoll', 'cat-Russian_Blue', 'cat-Siamese',
'cat-Sphynx', 'dog-american_bulldog',
'dog-american_pit_bull_terrier', 'dog-basset_hound',
'dog-beagle', 'dog-boxer', 'dog-chihuahua',
'dog-english_cocker_spaniel', 'dog-english_setter',
'dog-german_shorthaired', 'dog-great_pyrenees', 'dog-havanese',
'dog-japanese_chin', 'dog-keeshond', 'dog-leonberger',
'dog-miniature_pinscher', 'dog-newfoundland', 'dog-pomeranian',
'dog-pug', 'dog-saint_bernard', 'dog-samoyed',
'dog-scottish_terrier', 'dog-shiba_inu',
'dog-staffordshire_bull_terrier', 'dog-wheaten_terrier',
'dog-yorkshire_terrier')
metainfo = dict(classes=class_name)
_data_root = data_root + 'OxfordPets/by-breed/' # noqa
dataset_OxfordPets_by_breed = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_OxfordPets_by_breed = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------20 OxfordPets_by_species---------------------#
class_name = ('cat', 'dog')
metainfo = dict(classes=class_name)
_data_root = data_root + 'OxfordPets/by-species/' # noqa
dataset_OxfordPets_by_species = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_OxfordPets_by_species = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------21 PKLot---------------------#
class_name = ('space-empty', 'space-occupied')
metainfo = dict(classes=class_name)
_data_root = data_root + 'PKLot/640/' # noqa
dataset_PKLot = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_PKLot = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------22 Packages---------------------#
class_name = ('package', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'Packages/Raw/'
caption_prompt = {
'package': {
'prefix': 'there is a ',
'suffix': ' on the porch'
}
}
dataset_Packages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=base_test_pipeline,
caption_prompt=caption_prompt, # NOTE w. prompt 0.695; wo. prompt 0.687
test_mode=True,
return_classes=True)
val_evaluator_Packages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------23 PascalVOC---------------------#
class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
'tvmonitor')
metainfo = dict(classes=class_name)
_data_root = data_root + 'PascalVOC/'
dataset_PascalVOC = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_PascalVOC = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------24 pistols---------------------#
class_name = ('pistol', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'pistols/export/'
dataset_pistols = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_pistols = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------25 plantdoc---------------------#
class_name = ('Apple Scab Leaf', 'Apple leaf', 'Apple rust leaf',
'Bell_pepper leaf', 'Bell_pepper leaf spot', 'Blueberry leaf',
'Cherry leaf', 'Corn Gray leaf spot', 'Corn leaf blight',
'Corn rust leaf', 'Peach leaf', 'Potato leaf',
'Potato leaf early blight', 'Potato leaf late blight',
'Raspberry leaf', 'Soyabean leaf', 'Soybean leaf',
'Squash Powdery mildew leaf', 'Strawberry leaf',
'Tomato Early blight leaf', 'Tomato Septoria leaf spot',
'Tomato leaf', 'Tomato leaf bacterial spot',
'Tomato leaf late blight', 'Tomato leaf mosaic virus',
'Tomato leaf yellow virus', 'Tomato mold leaf',
'Tomato two spotted spider mites leaf', 'grape leaf',
'grape leaf black rot')
metainfo = dict(classes=class_name)
_data_root = data_root + 'plantdoc/416x416/'
dataset_plantdoc = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_plantdoc = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------26 pothole---------------------#
class_name = ('pothole', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'pothole/'
caption_prompt = {
'pothole': {
'name': 'holes',
'prefix': 'there are some ',
'suffix': ' on the road'
}
}
dataset_pothole = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
# NOTE w. prompt 0.137; wo. prompt 0.215
# caption_prompt=caption_prompt,
pipeline=base_test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_pothole = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------27 Raccoon---------------------#
class_name = ('raccoon', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/'
dataset_Raccoon = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_Raccoon = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------28 selfdrivingCar---------------------#
class_name = ('biker', 'car', 'pedestrian', 'trafficLight',
'trafficLight-Green', 'trafficLight-GreenLeft',
'trafficLight-Red', 'trafficLight-RedLeft',
'trafficLight-Yellow', 'trafficLight-YellowLeft', 'truck')
metainfo = dict(classes=class_name)
_data_root = data_root + 'selfdrivingCar/fixedLarge/export/'
dataset_selfdrivingCar = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='val_annotations_without_background.json',
data_prefix=dict(img=''),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_selfdrivingCar = dict(
type='CocoMetric',
ann_file=_data_root + 'val_annotations_without_background.json',
metric='bbox')
# ---------------------29 ShellfishOpenImages---------------------#
class_name = ('Crab', 'Lobster', 'Shrimp')
metainfo = dict(classes=class_name)
_data_root = data_root + 'ShellfishOpenImages/raw/'
dataset_ShellfishOpenImages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_ShellfishOpenImages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------30 ThermalCheetah---------------------#
class_name = ('cheetah', 'human')
metainfo = dict(classes=class_name)
_data_root = data_root + 'ThermalCheetah/'
dataset_ThermalCheetah = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_ThermalCheetah = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------31 thermalDogsAndPeople---------------------#
class_name = ('dog', 'person')
metainfo = dict(classes=class_name)
_data_root = data_root + 'thermalDogsAndPeople/'
dataset_thermalDogsAndPeople = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_thermalDogsAndPeople = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------32 UnoCards---------------------#
class_name = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11',
'12', '13', '14')
metainfo = dict(classes=class_name)
_data_root = data_root + 'UnoCards/raw/'
dataset_UnoCards = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_UnoCards = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------33 VehiclesOpenImages---------------------#
class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck')
metainfo = dict(classes=class_name)
_data_root = data_root + 'VehiclesOpenImages/416x416/'
dataset_VehiclesOpenImages = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_VehiclesOpenImages = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------34 WildfireSmoke---------------------#
class_name = ('smoke', )
metainfo = dict(classes=class_name)
_data_root = data_root + 'WildfireSmoke/'
dataset_WildfireSmoke = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_WildfireSmoke = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# ---------------------35 websiteScreenshots---------------------#
class_name = ('button', 'field', 'heading', 'iframe', 'image', 'label', 'link',
'text')
metainfo = dict(classes=class_name)
_data_root = data_root + 'websiteScreenshots/'
dataset_websiteScreenshots = dict(
type=dataset_type,
metainfo=metainfo,
data_root=_data_root,
ann_file='valid/annotations_without_background.json',
data_prefix=dict(img='valid/'),
pipeline=_base_.test_pipeline,
test_mode=True,
return_classes=True)
val_evaluator_websiteScreenshots = dict(
type='CocoMetric',
ann_file=_data_root + 'valid/annotations_without_background.json',
metric='bbox')
# --------------------- Config---------------------#
dataset_prefixes = [
'AerialMaritimeDrone_large',
'AerialMaritimeDrone_tiled',
'AmericanSignLanguageLetters',
'Aquarium',
'BCCD',
'boggleBoards',
'brackishUnderwater',
'ChessPieces',
'CottontailRabbits',
'dice',
'DroneControl',
'EgoHands_generic',
'EgoHands_specific',
'HardHatWorkers',
'MaskWearing',
'MountainDewCommercial',
'NorthAmericaMushrooms',
'openPoetryVision',
'OxfordPets_by_breed',
'OxfordPets_by_species',
'PKLot',
'Packages',
'PascalVOC',
'pistols',
'plantdoc',
'pothole',
'Raccoons',
'selfdrivingCar',
'ShellfishOpenImages',
'ThermalCheetah',
'thermalDogsAndPeople',
'UnoCards',
'VehiclesOpenImages',
'WildfireSmoke',
'websiteScreenshots',
]
datasets = [
dataset_AerialMaritimeDrone_large, dataset_AerialMaritimeDrone_tiled,
dataset_AmericanSignLanguageLetters, dataset_Aquarium, dataset_BCCD,
dataset_boggleBoards, dataset_brackishUnderwater, dataset_ChessPieces,
dataset_CottontailRabbits, dataset_dice, dataset_DroneControl,
dataset_EgoHands_generic, dataset_EgoHands_specific,
dataset_HardHatWorkers, dataset_MaskWearing, dataset_MountainDewCommercial,
dataset_NorthAmericaMushrooms, dataset_openPoetryVision,
dataset_OxfordPets_by_breed, dataset_OxfordPets_by_species, dataset_PKLot,
dataset_Packages, dataset_PascalVOC, dataset_pistols, dataset_plantdoc,
dataset_pothole, dataset_Raccoon, dataset_selfdrivingCar,
dataset_ShellfishOpenImages, dataset_ThermalCheetah,
dataset_thermalDogsAndPeople, dataset_UnoCards, dataset_VehiclesOpenImages,
dataset_WildfireSmoke, dataset_websiteScreenshots
]
metrics = [
val_evaluator_AerialMaritimeDrone_large,
val_evaluator_AerialMaritimeDrone_tiled,
val_evaluator_AmericanSignLanguageLetters, val_evaluator_Aquarium,
val_evaluator_BCCD, val_evaluator_boggleBoards,
val_evaluator_brackishUnderwater, val_evaluator_ChessPieces,
val_evaluator_CottontailRabbits, val_evaluator_dice,
val_evaluator_DroneControl, val_evaluator_EgoHands_generic,
val_evaluator_EgoHands_specific, val_evaluator_HardHatWorkers,
val_evaluator_MaskWearing, val_evaluator_MountainDewCommercial,
val_evaluator_NorthAmericaMushrooms, val_evaluator_openPoetryVision,
val_evaluator_OxfordPets_by_breed, val_evaluator_OxfordPets_by_species,
val_evaluator_PKLot, val_evaluator_Packages, val_evaluator_PascalVOC,
val_evaluator_pistols, val_evaluator_plantdoc, val_evaluator_pothole,
val_evaluator_Raccoon, val_evaluator_selfdrivingCar,
val_evaluator_ShellfishOpenImages, val_evaluator_ThermalCheetah,
val_evaluator_thermalDogsAndPeople, val_evaluator_UnoCards,
val_evaluator_VehiclesOpenImages, val_evaluator_WildfireSmoke,
val_evaluator_websiteScreenshots
]
# -------------------------------------------------#
val_dataloader = dict(
dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets))
test_dataloader = val_dataloader
val_evaluator = dict(
_delete_=True,
type='MultiDatasetsEvaluator',
metrics=metrics,
dataset_prefixes=dataset_prefixes)
test_evaluator = val_evaluator
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment