Commit dff2c686 authored by renzhc's avatar renzhc
Browse files

first commit

parent 8f9dd0ed
Pipeline #1665 canceled with stages
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=384,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=384, backend='pillow', interpolation='bicubic'),
dict(type='PackInputs'),
]
train_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
val_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
split='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
bgr_mean = data_preprocessor['mean'][::-1]
bgr_std = data_preprocessor['std'][::-1]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=224,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(
type='RandAugment',
policies='timm_increasing',
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')),
dict(
type='RandomErasing',
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=1 / 3,
fill_color=bgr_mean,
fill_std=bgr_std),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=248,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackInputs'),
]
train_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
val_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
split='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
# RGB format normalization parameters
mean=[122.5, 122.5, 122.5],
std=[122.5, 122.5, 122.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=320,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=int(320 / 224 * 256),
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=320),
dict(type='PackInputs'),
]
train_dataloader = dict(
batch_size=8,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
val_dataloader = dict(
batch_size=8,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
split='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# dataset settings
dataset_type = 'InShop'
data_preprocessor = dict(
num_classes=3997,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=512),
dict(type='RandomCrop', crop_size=448),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=512),
dict(type='CenterCrop', crop_size=448),
dict(type='PackInputs'),
]
train_dataloader = dict(
batch_size=32,
num_workers=4,
dataset=dict(
type=dataset_type,
data_root='data/inshop',
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
query_dataloader = dict(
batch_size=32,
num_workers=4,
dataset=dict(
type=dataset_type,
data_root='data/inshop',
split='query',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
gallery_dataloader = dict(
batch_size=32,
num_workers=4,
dataset=dict(
type=dataset_type,
data_root='data/inshop',
split='gallery',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
val_dataloader = query_dataloader
val_evaluator = [
dict(type='RetrievalRecall', topk=1),
dict(type='RetrievalAveragePrecision', topk=10),
]
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# dataset settings
data_preprocessor = dict(
type='MultiModalDataPreprocessor',
mean=[122.770938, 116.7460125, 104.09373615],
std=[68.5005327, 66.6321579, 70.32316305],
to_rgb=True,
)
train_pipeline = [
dict(
type='ApplyToList',
# NLVR requires to load two images in task.
scatter_key='img_path',
transforms=[
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=384,
interpolation='bicubic',
backend='pillow'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
],
collate_keys=['img', 'scale_factor', 'ori_shape'],
),
dict(type='CleanCaption', keys='text'),
dict(
type='PackInputs',
algorithm_keys=['text'],
meta_keys=['image_id'],
),
]
test_pipeline = [
dict(
type='ApplyToList',
# NLVR requires to load two images in task.
scatter_key='img_path',
transforms=[
dict(type='LoadImageFromFile'),
dict(
type='Resize',
scale=(384, 384),
interpolation='bicubic',
backend='pillow'),
],
collate_keys=['img', 'scale_factor', 'ori_shape'],
),
dict(
type='PackInputs',
algorithm_keys=['text'],
meta_keys=['image_id'],
),
]
train_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='NLVR2',
data_root='data/nlvr2',
ann_file='dev.json',
data_prefix='dev',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
drop_last=True,
)
val_dataloader = dict(
batch_size=64,
num_workers=8,
dataset=dict(
type='NLVR2',
data_root='data/nlvr2',
ann_file='dev.json',
data_prefix='dev',
pipeline=test_pipeline,
),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy')
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# data settings
data_preprocessor = dict(
type='MultiModalDataPreprocessor',
mean=[122.770938, 116.7460125, 104.09373615],
std=[68.5005327, 66.6321579, 70.32316305],
to_rgb=True,
)
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='Resize',
scale=(384, 384),
interpolation='bicubic',
backend='pillow'),
dict(type='PackInputs', meta_keys=['image_id']),
]
val_dataloader = dict(
batch_size=16,
num_workers=5,
dataset=dict(
type='NoCaps',
data_root='data/nocaps/',
data_prefix=dict(img_path='images/'),
ann_file='annotations/nocaps_val_4500_captions.json',
pipeline=test_pipeline,
),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(
type='NocapsSave',
save_dir='./',
)
# # If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# data settings
data_preprocessor = dict(
mean=[122.770938, 116.7460125, 104.09373615],
std=[68.5005327, 66.6321579, 70.32316305],
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=384,
interpolation='bicubic',
backend='pillow'),
dict(type='CleanCaption', keys=['question', 'gt_answer']),
dict(
type='PackInputs',
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
meta_keys=[],
),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='Resize',
scale=(480, 480),
interpolation='bicubic',
backend='pillow'),
dict(type='CleanCaption', keys=['question', 'gt_answer']),
dict(
type='PackInputs',
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
meta_keys=[],
),
]
train_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='OCRVQA',
data_root='data/ocrvqa',
data_prefix='images',
ann_file='annotations/dataset.json',
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
drop_last=True,
)
val_dataloader = dict(
batch_size=64,
num_workers=8,
dataset=dict(
type='OCRVQA',
data_root='data/ocrvqa',
data_prefix='images',
ann_file='annotations/dataset.json',
split='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='VQAAcc')
test_dataloader = dict(
batch_size=64,
num_workers=8,
dataset=dict(
type='OCRVQA',
data_root='data/ocrvqa',
data_prefix='images',
ann_file='annotations/dataset.json',
split='test',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
test_evaluator = dict(type='VQAAcc')
# Policy for ImageNet, refers to
# https://github.com/DeepVoltaire/AutoAugment/blame/master/autoaugment.py
policy_imagenet = [
[
dict(type='Posterize', bits=4, prob=0.4),
dict(type='Rotate', angle=30., prob=0.6)
],
[
dict(type='Solarize', thr=256 / 9 * 4, prob=0.6),
dict(type='AutoContrast', prob=0.6)
],
[dict(type='Equalize', prob=0.8),
dict(type='Equalize', prob=0.6)],
[
dict(type='Posterize', bits=5, prob=0.6),
dict(type='Posterize', bits=5, prob=0.6)
],
[
dict(type='Equalize', prob=0.4),
dict(type='Solarize', thr=256 / 9 * 5, prob=0.2)
],
[
dict(type='Equalize', prob=0.4),
dict(type='Rotate', angle=30 / 9 * 8, prob=0.8)
],
[
dict(type='Solarize', thr=256 / 9 * 6, prob=0.6),
dict(type='Equalize', prob=0.6)
],
[dict(type='Posterize', bits=6, prob=0.8),
dict(type='Equalize', prob=1.)],
[
dict(type='Rotate', angle=10., prob=0.2),
dict(type='Solarize', thr=256 / 9, prob=0.6)
],
[
dict(type='Equalize', prob=0.6),
dict(type='Posterize', bits=5, prob=0.4)
],
[
dict(type='Rotate', angle=30 / 9 * 8, prob=0.8),
dict(type='ColorTransform', magnitude=0., prob=0.4)
],
[
dict(type='Rotate', angle=30., prob=0.4),
dict(type='Equalize', prob=0.6)
],
[dict(type='Equalize', prob=0.0),
dict(type='Equalize', prob=0.8)],
[dict(type='Invert', prob=0.6),
dict(type='Equalize', prob=1.)],
[
dict(type='ColorTransform', magnitude=0.4, prob=0.6),
dict(type='Contrast', magnitude=0.8, prob=1.)
],
[
dict(type='Rotate', angle=30 / 9 * 8, prob=0.8),
dict(type='ColorTransform', magnitude=0.2, prob=1.)
],
[
dict(type='ColorTransform', magnitude=0.8, prob=0.8),
dict(type='Solarize', thr=256 / 9 * 2, prob=0.8)
],
[
dict(type='Sharpness', magnitude=0.7, prob=0.4),
dict(type='Invert', prob=0.6)
],
[
dict(
type='Shear',
magnitude=0.3 / 9 * 5,
prob=0.6,
direction='horizontal'),
dict(type='Equalize', prob=1.)
],
[
dict(type='ColorTransform', magnitude=0., prob=0.4),
dict(type='Equalize', prob=0.6)
],
[
dict(type='Equalize', prob=0.4),
dict(type='Solarize', thr=256 / 9 * 5, prob=0.2)
],
[
dict(type='Solarize', thr=256 / 9 * 4, prob=0.6),
dict(type='AutoContrast', prob=0.6)
],
[dict(type='Invert', prob=0.6),
dict(type='Equalize', prob=1.)],
[
dict(type='ColorTransform', magnitude=0.4, prob=0.6),
dict(type='Contrast', magnitude=0.8, prob=1.)
],
[dict(type='Equalize', prob=0.8),
dict(type='Equalize', prob=0.6)],
]
# Refers to `_RAND_INCREASING_TRANSFORMS` in pytorch-image-models
rand_increasing_policies = [
dict(type='AutoContrast'),
dict(type='Equalize'),
dict(type='Invert'),
dict(type='Rotate', magnitude_key='angle', magnitude_range=(0, 30)),
dict(type='Posterize', magnitude_key='bits', magnitude_range=(4, 0)),
dict(type='Solarize', magnitude_key='thr', magnitude_range=(256, 0)),
dict(
type='SolarizeAdd',
magnitude_key='magnitude',
magnitude_range=(0, 110)),
dict(
type='ColorTransform',
magnitude_key='magnitude',
magnitude_range=(0, 0.9)),
dict(type='Contrast', magnitude_key='magnitude', magnitude_range=(0, 0.9)),
dict(
type='Brightness', magnitude_key='magnitude',
magnitude_range=(0, 0.9)),
dict(
type='Sharpness', magnitude_key='magnitude', magnitude_range=(0, 0.9)),
dict(
type='Shear',
magnitude_key='magnitude',
magnitude_range=(0, 0.3),
direction='horizontal'),
dict(
type='Shear',
magnitude_key='magnitude',
magnitude_range=(0, 0.3),
direction='vertical'),
dict(
type='Translate',
magnitude_key='magnitude',
magnitude_range=(0, 0.45),
direction='horizontal'),
dict(
type='Translate',
magnitude_key='magnitude',
magnitude_range=(0, 0.45),
direction='vertical')
]
# data settings
data_preprocessor = dict(
mean=[122.770938, 116.7460125, 104.09373615],
std=[68.5005327, 66.6321579, 70.32316305],
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomApply',
transforms=[
dict(
type='ColorJitter',
brightness=0.4,
contrast=0.4,
saturation=0.4,
hue=0.1,
backend='cv2')
],
prob=0.5),
dict(
type='mmdet.RandomCrop',
crop_type='relative_range',
crop_size=(0.8, 0.8),
allow_negative_crop=False),
dict(
type='RandomChoiceResize',
scales=[(384, 384), (360, 360), (344, 344), (312, 312), (300, 300),
(286, 286), (270, 270)],
keep_ratio=False),
dict(
type='RandomTranslatePad',
size=384,
aug_translate=True,
),
dict(type='CleanCaption', keys='text'),
dict(
type='PackInputs',
algorithm_keys=['text', 'gt_bboxes', 'scale_factor'],
meta_keys=['image_id'],
),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='Resize',
scale=(384, 384),
interpolation='bicubic',
backend='pillow'),
dict(type='CleanCaption', keys='text'),
dict(
type='PackInputs',
algorithm_keys=['text', 'gt_bboxes', 'scale_factor'],
meta_keys=['image_id'],
),
]
train_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='RefCOCO',
data_root='data/coco',
data_prefix='train2014',
ann_file='refcoco/instances.json',
split_file='refcoco/refs(unc).p',
split='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
drop_last=True,
)
val_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='RefCOCO',
data_root='data/coco',
data_prefix='train2014',
ann_file='refcoco/instances.json',
split_file='refcoco/refs(unc).p',
split='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
val_evaluator = dict(type='VisualGroundingMetric')
test_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='RefCOCO',
data_root='data/coco',
data_prefix='train2014',
ann_file='refcoco/instances.json',
split_file='refcoco/refs(unc).p',
split='testA', # or 'testB'
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
test_evaluator = val_evaluator
# dataset settings
dataset_type = 'CustomDataset'
data_preprocessor = dict(
num_classes=200,
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', scale=224),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='ResizeEdge', scale=256, edge='short'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackInputs'),
]
train_dataloader = dict(
batch_size=32,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
val_dataloader = dict(
batch_size=32,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# dataset settings
dataset_type = 'CustomDataset'
data_preprocessor = dict(
num_classes=200,
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', scale=224, backend='pillow'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='ResizeEdge', scale=256, edge='short', backend='pillow'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackInputs'),
]
train_dataloader = dict(
batch_size=32,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
val_dataloader = dict(
batch_size=32,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# dataset settings
dataset_type = 'CustomDataset'
data_preprocessor = dict(
num_classes=200,
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
bgr_mean = data_preprocessor['mean'][::-1]
bgr_std = data_preprocessor['std'][::-1]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=224,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(
type='AutoAugment',
policies='imagenet',
hparams=dict(
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=256,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackInputs'),
]
train_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
val_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# dataset settings
dataset_type = 'CustomDataset'
data_preprocessor = dict(
num_classes=200,
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
bgr_mean = data_preprocessor['mean'][::-1]
bgr_std = data_preprocessor['std'][::-1]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=224,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(
type='RandAugment',
policies='timm_increasing',
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')),
dict(
type='RandomErasing',
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=1 / 3,
fill_color=bgr_mean,
fill_std=bgr_std),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=256,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackInputs'),
]
train_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
val_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# data settings
data_preprocessor = dict(
mean=[122.770938, 116.7460125, 104.09373615],
std=[68.5005327, 66.6321579, 70.32316305],
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=384,
interpolation='bicubic',
backend='pillow'),
dict(
type='PackInputs',
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
meta_keys=['question_id', 'image_id'],
),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='Resize',
scale=(480, 480),
interpolation='bicubic',
backend='pillow'),
dict(
type='CleanCaption',
keys=['question'],
),
dict(
type='PackInputs',
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
meta_keys=['question_id', 'image_id'],
),
]
train_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='VizWiz',
data_root='data/vizwiz/Images',
data_prefix='',
ann_file='Annotations/train.json',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
drop_last=True,
)
val_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='VizWiz',
data_root='data/vizwiz/Images',
data_prefix='',
ann_file='Annotations/val.json',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='VizWizAcc')
test_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='VizWiz',
data_root='data/vizwiz/Images',
data_prefix='',
ann_file='Annotations/test.json',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
test_evaluator = dict(type='ReportVQA', file_path='vqa_test.json')
# dataset settings
dataset_type = 'VOC'
data_preprocessor = dict(
num_classes=20,
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
# generate onehot-format labels for multi-label classification.
to_onehot=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', scale=224),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='ResizeEdge', scale=256, edge='short'),
dict(type='CenterCrop', crop_size=224),
dict(
type='PackInputs',
# `gt_label_difficult` is needed for VOC evaluation
meta_keys=('sample_idx', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction',
'gt_label_difficult')),
]
train_dataloader = dict(
batch_size=16,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/VOC2007',
split='trainval',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
)
val_dataloader = dict(
batch_size=16,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/VOC2007',
split='test',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
test_dataloader = val_dataloader
# calculate precision_recall_f1 and mAP
val_evaluator = [
dict(type='VOCMultiLabelMetric'),
dict(type='VOCMultiLabelMetric', average='micro'),
dict(type='VOCAveragePrecision')
]
test_dataloader = val_dataloader
test_evaluator = val_evaluator
# data settings
data_preprocessor = dict(
mean=[122.770938, 116.7460125, 104.09373615],
std=[68.5005327, 66.6321579, 70.32316305],
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=384,
interpolation='bicubic',
backend='pillow'),
dict(
type='PackInputs',
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
meta_keys=['question_id', 'image_id'],
),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='Resize',
scale=(480, 480),
interpolation='bicubic',
backend='pillow'),
dict(
type='CleanCaption',
keys=['question'],
),
dict(
type='PackInputs',
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
meta_keys=['question_id', 'image_id'],
),
]
train_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='VSR',
data_root='data/coco',
data_prefix='',
ann_file='annotations/train.json',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
drop_last=True,
)
val_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='VSR',
data_root='data/coco',
data_prefix='',
ann_file='annotations/val.json',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='VSRAcc')
test_dataloader = dict(
batch_size=16,
num_workers=8,
dataset=dict(
type='VSR',
data_root='data/coco',
data_prefix='',
ann_file='annotations/test.json',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
test_evaluator = val_evaluator
# defaults to use registries in mmpretrain
default_scope = 'mmpretrain'
# configure default hooks
default_hooks = dict(
# record the time of every iteration.
timer=dict(type='IterTimerHook'),
# print log every 100 iterations.
logger=dict(type='LoggerHook', interval=100),
# enable the parameter scheduler.
param_scheduler=dict(type='ParamSchedulerHook'),
# save checkpoint per epoch.
checkpoint=dict(type='CheckpointHook', interval=1),
# set sampler seed in distributed evrionment.
sampler_seed=dict(type='DistSamplerSeedHook'),
# validation results visualization, set True to enable it.
visualization=dict(type='VisualizationHook', enable=False),
)
# configure environment
env_cfg = dict(
# whether to enable cudnn benchmark
cudnn_benchmark=False,
# set multi process parameters
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
# set distributed parameters
dist_cfg=dict(backend='nccl'),
)
# set visualizer
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(type='UniversalVisualizer', vis_backends=vis_backends)
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
# Defaults to use random seed and disable `deterministic`
randomness = dict(seed=None, deterministic=False)
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='Conformer', arch='base', drop_path_rate=0.1, init_cfg=None),
neck=None,
head=dict(
type='ConformerHead',
num_classes=1000,
in_channels=[1536, 576],
init_cfg=None,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0)
]),
)
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='Conformer', arch='small', drop_path_rate=0.1, init_cfg=None),
neck=None,
head=dict(
type='ConformerHead',
num_classes=1000,
in_channels=[1024, 384],
init_cfg=None,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0)
]),
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment