Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMPretrain-MMCV
Commits
dff2c686
Commit
dff2c686
authored
Sep 03, 2024
by
renzhc
Browse files
first commit
parent
8f9dd0ed
Pipeline
#1665
canceled with stages
Changes
1000
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1282 additions
and
0 deletions
+1282
-0
configs/_base_/datasets/imagenet_bs64_swin_384.py
configs/_base_/datasets/imagenet_bs64_swin_384.py
+54
-0
configs/_base_/datasets/imagenet_bs64_t2t_224.py
configs/_base_/datasets/imagenet_bs64_t2t_224.py
+80
-0
configs/_base_/datasets/imagenet_bs8_pil_bicubic_320.py
configs/_base_/datasets/imagenet_bs8_pil_bicubic_320.py
+59
-0
configs/_base_/datasets/inshop_bs32_448.py
configs/_base_/datasets/inshop_bs32_448.py
+64
-0
configs/_base_/datasets/nlvr2.py
configs/_base_/datasets/nlvr2.py
+86
-0
configs/_base_/datasets/nocaps.py
configs/_base_/datasets/nocaps.py
+41
-0
configs/_base_/datasets/ocrvqa.py
configs/_base_/datasets/ocrvqa.py
+81
-0
configs/_base_/datasets/pipelines/auto_aug.py
configs/_base_/datasets/pipelines/auto_aug.py
+96
-0
configs/_base_/datasets/pipelines/rand_aug.py
configs/_base_/datasets/pipelines/rand_aug.py
+43
-0
configs/_base_/datasets/refcoco.py
configs/_base_/datasets/refcoco.py
+105
-0
configs/_base_/datasets/tiny_imagenet_bs32.py
configs/_base_/datasets/tiny_imagenet_bs32.py
+51
-0
configs/_base_/datasets/tiny_imagenet_bs32_pil_resize.py
configs/_base_/datasets/tiny_imagenet_bs32_pil_resize.py
+51
-0
configs/_base_/datasets/tiny_imagenet_bs64_pil_resize_autoaug.py
.../_base_/datasets/tiny_imagenet_bs64_pil_resize_autoaug.py
+68
-0
configs/_base_/datasets/tiny_imagenet_bs64_swin_224.py
configs/_base_/datasets/tiny_imagenet_bs64_swin_224.py
+80
-0
configs/_base_/datasets/vizwiz.py
configs/_base_/datasets/vizwiz.py
+80
-0
configs/_base_/datasets/voc_bs16.py
configs/_base_/datasets/voc_bs16.py
+65
-0
configs/_base_/datasets/vsr.py
configs/_base_/datasets/vsr.py
+81
-0
configs/_base_/default_runtime.py
configs/_base_/default_runtime.py
+51
-0
configs/_base_/models/conformer/base-p16.py
configs/_base_/models/conformer/base-p16.py
+23
-0
configs/_base_/models/conformer/small-p16.py
configs/_base_/models/conformer/small-p16.py
+23
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
configs/_base_/datasets/imagenet_bs64_swin_384.py
0 → 100644
View file @
dff2c686
# dataset settings
dataset_type
=
'ImageNet'
data_preprocessor
=
dict
(
num_classes
=
1000
,
# RGB format normalization parameters
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
384
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
'PackInputs'
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'Resize'
,
scale
=
384
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'PackInputs'
),
]
train_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
split
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
)
val_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
split
=
'val'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
val_evaluator
=
dict
(
type
=
'Accuracy'
,
topk
=
(
1
,
5
))
# If you want standard test, please manually configure the test dataset
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/imagenet_bs64_t2t_224.py
0 → 100644
View file @
dff2c686
# dataset settings
dataset_type
=
'ImageNet'
data_preprocessor
=
dict
(
num_classes
=
1000
,
# RGB format normalization parameters
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
bgr_mean
=
data_preprocessor
[
'mean'
][::
-
1
]
bgr_std
=
data_preprocessor
[
'std'
][::
-
1
]
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
224
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
'RandAugment'
,
policies
=
'timm_increasing'
,
num_policies
=
2
,
total_level
=
10
,
magnitude_level
=
9
,
magnitude_std
=
0.5
,
hparams
=
dict
(
pad_val
=
[
round
(
x
)
for
x
in
bgr_mean
],
interpolation
=
'bicubic'
)),
dict
(
type
=
'RandomErasing'
,
erase_prob
=
0.25
,
mode
=
'rand'
,
min_area_ratio
=
0.02
,
max_area_ratio
=
1
/
3
,
fill_color
=
bgr_mean
,
fill_std
=
bgr_std
),
dict
(
type
=
'PackInputs'
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'ResizeEdge'
,
scale
=
248
,
edge
=
'short'
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'PackInputs'
),
]
train_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
split
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
)
val_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
split
=
'val'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
val_evaluator
=
dict
(
type
=
'Accuracy'
,
topk
=
(
1
,
5
))
# If you want standard test, please manually configure the test dataset
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/imagenet_bs8_pil_bicubic_320.py
0 → 100644
View file @
dff2c686
# dataset settings
dataset_type
=
'ImageNet'
data_preprocessor
=
dict
(
# RGB format normalization parameters
mean
=
[
122.5
,
122.5
,
122.5
],
std
=
[
122.5
,
122.5
,
122.5
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
320
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
'PackInputs'
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'ResizeEdge'
,
scale
=
int
(
320
/
224
*
256
),
edge
=
'short'
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
320
),
dict
(
type
=
'PackInputs'
),
]
train_dataloader
=
dict
(
batch_size
=
8
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
split
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
)
val_dataloader
=
dict
(
batch_size
=
8
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
split
=
'val'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
val_evaluator
=
dict
(
type
=
'Accuracy'
,
topk
=
(
1
,
5
))
# If you want standard test, please manually configure the test dataset
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/inshop_bs32_448.py
0 → 100644
View file @
dff2c686
# dataset settings
dataset_type
=
'InShop'
data_preprocessor
=
dict
(
num_classes
=
3997
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
# convert image from BGR to RGB
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'Resize'
,
scale
=
512
),
dict
(
type
=
'RandomCrop'
,
crop_size
=
448
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
'PackInputs'
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'Resize'
,
scale
=
512
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
448
),
dict
(
type
=
'PackInputs'
),
]
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
4
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/inshop'
,
split
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
)
query_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
4
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/inshop'
,
split
=
'query'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
gallery_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
4
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/inshop'
,
split
=
'gallery'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
val_dataloader
=
query_dataloader
val_evaluator
=
[
dict
(
type
=
'RetrievalRecall'
,
topk
=
1
),
dict
(
type
=
'RetrievalAveragePrecision'
,
topk
=
10
),
]
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/nlvr2.py
0 → 100644
View file @
dff2c686
# dataset settings
data_preprocessor
=
dict
(
type
=
'MultiModalDataPreprocessor'
,
mean
=
[
122.770938
,
116.7460125
,
104.09373615
],
std
=
[
68.5005327
,
66.6321579
,
70.32316305
],
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'ApplyToList'
,
# NLVR requires to load two images in task.
scatter_key
=
'img_path'
,
transforms
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
384
,
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
],
collate_keys
=
[
'img'
,
'scale_factor'
,
'ori_shape'
],
),
dict
(
type
=
'CleanCaption'
,
keys
=
'text'
),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'text'
],
meta_keys
=
[
'image_id'
],
),
]
test_pipeline
=
[
dict
(
type
=
'ApplyToList'
,
# NLVR requires to load two images in task.
scatter_key
=
'img_path'
,
transforms
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'Resize'
,
scale
=
(
384
,
384
),
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
],
collate_keys
=
[
'img'
,
'scale_factor'
,
'ori_shape'
],
),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'text'
],
meta_keys
=
[
'image_id'
],
),
]
train_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'NLVR2'
,
data_root
=
'data/nlvr2'
,
ann_file
=
'dev.json'
,
data_prefix
=
'dev'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
persistent_workers
=
True
,
drop_last
=
True
,
)
val_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'NLVR2'
,
data_root
=
'data/nlvr2'
,
ann_file
=
'dev.json'
,
data_prefix
=
'dev'
,
pipeline
=
test_pipeline
,
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
persistent_workers
=
True
,
)
val_evaluator
=
dict
(
type
=
'Accuracy'
)
# If you want standard test, please manually configure the test dataset
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/nocaps.py
0 → 100644
View file @
dff2c686
# data settings
data_preprocessor
=
dict
(
type
=
'MultiModalDataPreprocessor'
,
mean
=
[
122.770938
,
116.7460125
,
104.09373615
],
std
=
[
68.5005327
,
66.6321579
,
70.32316305
],
to_rgb
=
True
,
)
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'Resize'
,
scale
=
(
384
,
384
),
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
dict
(
type
=
'PackInputs'
,
meta_keys
=
[
'image_id'
]),
]
val_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
5
,
dataset
=
dict
(
type
=
'NoCaps'
,
data_root
=
'data/nocaps/'
,
data_prefix
=
dict
(
img_path
=
'images/'
),
ann_file
=
'annotations/nocaps_val_4500_captions.json'
,
pipeline
=
test_pipeline
,
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
persistent_workers
=
True
,
)
val_evaluator
=
dict
(
type
=
'NocapsSave'
,
save_dir
=
'./'
,
)
# # If you want standard test, please manually configure the test dataset
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/ocrvqa.py
0 → 100644
View file @
dff2c686
# data settings
data_preprocessor
=
dict
(
mean
=
[
122.770938
,
116.7460125
,
104.09373615
],
std
=
[
68.5005327
,
66.6321579
,
70.32316305
],
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
384
,
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
dict
(
type
=
'CleanCaption'
,
keys
=
[
'question'
,
'gt_answer'
]),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'question'
,
'gt_answer'
,
'gt_answer_weight'
],
meta_keys
=
[],
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'Resize'
,
scale
=
(
480
,
480
),
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
dict
(
type
=
'CleanCaption'
,
keys
=
[
'question'
,
'gt_answer'
]),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'question'
,
'gt_answer'
,
'gt_answer_weight'
],
meta_keys
=
[],
),
]
train_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'OCRVQA'
,
data_root
=
'data/ocrvqa'
,
data_prefix
=
'images'
,
ann_file
=
'annotations/dataset.json'
,
split
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
persistent_workers
=
True
,
drop_last
=
True
,
)
val_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'OCRVQA'
,
data_root
=
'data/ocrvqa'
,
data_prefix
=
'images'
,
ann_file
=
'annotations/dataset.json'
,
split
=
'val'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
persistent_workers
=
True
,
)
val_evaluator
=
dict
(
type
=
'VQAAcc'
)
test_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'OCRVQA'
,
data_root
=
'data/ocrvqa'
,
data_prefix
=
'images'
,
ann_file
=
'annotations/dataset.json'
,
split
=
'test'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
test_evaluator
=
dict
(
type
=
'VQAAcc'
)
configs/_base_/datasets/pipelines/auto_aug.py
0 → 100644
View file @
dff2c686
# Policy for ImageNet, refers to
# https://github.com/DeepVoltaire/AutoAugment/blame/master/autoaugment.py
policy_imagenet
=
[
[
dict
(
type
=
'Posterize'
,
bits
=
4
,
prob
=
0.4
),
dict
(
type
=
'Rotate'
,
angle
=
30.
,
prob
=
0.6
)
],
[
dict
(
type
=
'Solarize'
,
thr
=
256
/
9
*
4
,
prob
=
0.6
),
dict
(
type
=
'AutoContrast'
,
prob
=
0.6
)
],
[
dict
(
type
=
'Equalize'
,
prob
=
0.8
),
dict
(
type
=
'Equalize'
,
prob
=
0.6
)],
[
dict
(
type
=
'Posterize'
,
bits
=
5
,
prob
=
0.6
),
dict
(
type
=
'Posterize'
,
bits
=
5
,
prob
=
0.6
)
],
[
dict
(
type
=
'Equalize'
,
prob
=
0.4
),
dict
(
type
=
'Solarize'
,
thr
=
256
/
9
*
5
,
prob
=
0.2
)
],
[
dict
(
type
=
'Equalize'
,
prob
=
0.4
),
dict
(
type
=
'Rotate'
,
angle
=
30
/
9
*
8
,
prob
=
0.8
)
],
[
dict
(
type
=
'Solarize'
,
thr
=
256
/
9
*
6
,
prob
=
0.6
),
dict
(
type
=
'Equalize'
,
prob
=
0.6
)
],
[
dict
(
type
=
'Posterize'
,
bits
=
6
,
prob
=
0.8
),
dict
(
type
=
'Equalize'
,
prob
=
1.
)],
[
dict
(
type
=
'Rotate'
,
angle
=
10.
,
prob
=
0.2
),
dict
(
type
=
'Solarize'
,
thr
=
256
/
9
,
prob
=
0.6
)
],
[
dict
(
type
=
'Equalize'
,
prob
=
0.6
),
dict
(
type
=
'Posterize'
,
bits
=
5
,
prob
=
0.4
)
],
[
dict
(
type
=
'Rotate'
,
angle
=
30
/
9
*
8
,
prob
=
0.8
),
dict
(
type
=
'ColorTransform'
,
magnitude
=
0.
,
prob
=
0.4
)
],
[
dict
(
type
=
'Rotate'
,
angle
=
30.
,
prob
=
0.4
),
dict
(
type
=
'Equalize'
,
prob
=
0.6
)
],
[
dict
(
type
=
'Equalize'
,
prob
=
0.0
),
dict
(
type
=
'Equalize'
,
prob
=
0.8
)],
[
dict
(
type
=
'Invert'
,
prob
=
0.6
),
dict
(
type
=
'Equalize'
,
prob
=
1.
)],
[
dict
(
type
=
'ColorTransform'
,
magnitude
=
0.4
,
prob
=
0.6
),
dict
(
type
=
'Contrast'
,
magnitude
=
0.8
,
prob
=
1.
)
],
[
dict
(
type
=
'Rotate'
,
angle
=
30
/
9
*
8
,
prob
=
0.8
),
dict
(
type
=
'ColorTransform'
,
magnitude
=
0.2
,
prob
=
1.
)
],
[
dict
(
type
=
'ColorTransform'
,
magnitude
=
0.8
,
prob
=
0.8
),
dict
(
type
=
'Solarize'
,
thr
=
256
/
9
*
2
,
prob
=
0.8
)
],
[
dict
(
type
=
'Sharpness'
,
magnitude
=
0.7
,
prob
=
0.4
),
dict
(
type
=
'Invert'
,
prob
=
0.6
)
],
[
dict
(
type
=
'Shear'
,
magnitude
=
0.3
/
9
*
5
,
prob
=
0.6
,
direction
=
'horizontal'
),
dict
(
type
=
'Equalize'
,
prob
=
1.
)
],
[
dict
(
type
=
'ColorTransform'
,
magnitude
=
0.
,
prob
=
0.4
),
dict
(
type
=
'Equalize'
,
prob
=
0.6
)
],
[
dict
(
type
=
'Equalize'
,
prob
=
0.4
),
dict
(
type
=
'Solarize'
,
thr
=
256
/
9
*
5
,
prob
=
0.2
)
],
[
dict
(
type
=
'Solarize'
,
thr
=
256
/
9
*
4
,
prob
=
0.6
),
dict
(
type
=
'AutoContrast'
,
prob
=
0.6
)
],
[
dict
(
type
=
'Invert'
,
prob
=
0.6
),
dict
(
type
=
'Equalize'
,
prob
=
1.
)],
[
dict
(
type
=
'ColorTransform'
,
magnitude
=
0.4
,
prob
=
0.6
),
dict
(
type
=
'Contrast'
,
magnitude
=
0.8
,
prob
=
1.
)
],
[
dict
(
type
=
'Equalize'
,
prob
=
0.8
),
dict
(
type
=
'Equalize'
,
prob
=
0.6
)],
]
configs/_base_/datasets/pipelines/rand_aug.py
0 → 100644
View file @
dff2c686
# Refers to `_RAND_INCREASING_TRANSFORMS` in pytorch-image-models
rand_increasing_policies
=
[
dict
(
type
=
'AutoContrast'
),
dict
(
type
=
'Equalize'
),
dict
(
type
=
'Invert'
),
dict
(
type
=
'Rotate'
,
magnitude_key
=
'angle'
,
magnitude_range
=
(
0
,
30
)),
dict
(
type
=
'Posterize'
,
magnitude_key
=
'bits'
,
magnitude_range
=
(
4
,
0
)),
dict
(
type
=
'Solarize'
,
magnitude_key
=
'thr'
,
magnitude_range
=
(
256
,
0
)),
dict
(
type
=
'SolarizeAdd'
,
magnitude_key
=
'magnitude'
,
magnitude_range
=
(
0
,
110
)),
dict
(
type
=
'ColorTransform'
,
magnitude_key
=
'magnitude'
,
magnitude_range
=
(
0
,
0.9
)),
dict
(
type
=
'Contrast'
,
magnitude_key
=
'magnitude'
,
magnitude_range
=
(
0
,
0.9
)),
dict
(
type
=
'Brightness'
,
magnitude_key
=
'magnitude'
,
magnitude_range
=
(
0
,
0.9
)),
dict
(
type
=
'Sharpness'
,
magnitude_key
=
'magnitude'
,
magnitude_range
=
(
0
,
0.9
)),
dict
(
type
=
'Shear'
,
magnitude_key
=
'magnitude'
,
magnitude_range
=
(
0
,
0.3
),
direction
=
'horizontal'
),
dict
(
type
=
'Shear'
,
magnitude_key
=
'magnitude'
,
magnitude_range
=
(
0
,
0.3
),
direction
=
'vertical'
),
dict
(
type
=
'Translate'
,
magnitude_key
=
'magnitude'
,
magnitude_range
=
(
0
,
0.45
),
direction
=
'horizontal'
),
dict
(
type
=
'Translate'
,
magnitude_key
=
'magnitude'
,
magnitude_range
=
(
0
,
0.45
),
direction
=
'vertical'
)
]
configs/_base_/datasets/refcoco.py
0 → 100644
View file @
dff2c686
# data settings
data_preprocessor
=
dict
(
mean
=
[
122.770938
,
116.7460125
,
104.09373615
],
std
=
[
68.5005327
,
66.6321579
,
70.32316305
],
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomApply'
,
transforms
=
[
dict
(
type
=
'ColorJitter'
,
brightness
=
0.4
,
contrast
=
0.4
,
saturation
=
0.4
,
hue
=
0.1
,
backend
=
'cv2'
)
],
prob
=
0.5
),
dict
(
type
=
'mmdet.RandomCrop'
,
crop_type
=
'relative_range'
,
crop_size
=
(
0.8
,
0.8
),
allow_negative_crop
=
False
),
dict
(
type
=
'RandomChoiceResize'
,
scales
=
[(
384
,
384
),
(
360
,
360
),
(
344
,
344
),
(
312
,
312
),
(
300
,
300
),
(
286
,
286
),
(
270
,
270
)],
keep_ratio
=
False
),
dict
(
type
=
'RandomTranslatePad'
,
size
=
384
,
aug_translate
=
True
,
),
dict
(
type
=
'CleanCaption'
,
keys
=
'text'
),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'text'
,
'gt_bboxes'
,
'scale_factor'
],
meta_keys
=
[
'image_id'
],
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'Resize'
,
scale
=
(
384
,
384
),
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
dict
(
type
=
'CleanCaption'
,
keys
=
'text'
),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'text'
,
'gt_bboxes'
,
'scale_factor'
],
meta_keys
=
[
'image_id'
],
),
]
train_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'RefCOCO'
,
data_root
=
'data/coco'
,
data_prefix
=
'train2014'
,
ann_file
=
'refcoco/instances.json'
,
split_file
=
'refcoco/refs(unc).p'
,
split
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
drop_last
=
True
,
)
val_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'RefCOCO'
,
data_root
=
'data/coco'
,
data_prefix
=
'train2014'
,
ann_file
=
'refcoco/instances.json'
,
split_file
=
'refcoco/refs(unc).p'
,
split
=
'val'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
val_evaluator
=
dict
(
type
=
'VisualGroundingMetric'
)
test_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'RefCOCO'
,
data_root
=
'data/coco'
,
data_prefix
=
'train2014'
,
ann_file
=
'refcoco/instances.json'
,
split_file
=
'refcoco/refs(unc).p'
,
split
=
'testA'
,
# or 'testB'
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
test_evaluator
=
val_evaluator
configs/_base_/datasets/tiny_imagenet_bs32.py
0 → 100644
View file @
dff2c686
# dataset settings
dataset_type
=
'CustomDataset'
data_preprocessor
=
dict
(
num_classes
=
200
,
# RGB format normalization parameters
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
224
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
'PackInputs'
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'ResizeEdge'
,
scale
=
256
,
edge
=
'short'
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'PackInputs'
),
]
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
data_prefix
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
)
val_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
data_prefix
=
'val'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
val_evaluator
=
dict
(
type
=
'Accuracy'
,
topk
=
(
1
,
5
))
# If you want standard test, please manually configure the test dataset
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/tiny_imagenet_bs32_pil_resize.py
0 → 100644
View file @
dff2c686
# dataset settings
dataset_type
=
'CustomDataset'
data_preprocessor
=
dict
(
num_classes
=
200
,
# RGB format normalization parameters
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
224
,
backend
=
'pillow'
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
'PackInputs'
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'ResizeEdge'
,
scale
=
256
,
edge
=
'short'
,
backend
=
'pillow'
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'PackInputs'
),
]
train_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
data_prefix
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
)
val_dataloader
=
dict
(
batch_size
=
32
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
data_prefix
=
'val'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
val_evaluator
=
dict
(
type
=
'Accuracy'
,
topk
=
(
1
,
5
))
# If you want standard test, please manually configure the test dataset
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/tiny_imagenet_bs64_pil_resize_autoaug.py
0 → 100644
View file @
dff2c686
# dataset settings
dataset_type
=
'CustomDataset'
data_preprocessor
=
dict
(
num_classes
=
200
,
# RGB format normalization parameters
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
bgr_mean
=
data_preprocessor
[
'mean'
][::
-
1
]
bgr_std
=
data_preprocessor
[
'std'
][::
-
1
]
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
224
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
'AutoAugment'
,
policies
=
'imagenet'
,
hparams
=
dict
(
pad_val
=
[
round
(
x
)
for
x
in
bgr_mean
],
interpolation
=
'bicubic'
)),
dict
(
type
=
'PackInputs'
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'ResizeEdge'
,
scale
=
256
,
edge
=
'short'
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'PackInputs'
),
]
train_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
data_prefix
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
)
val_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
data_prefix
=
'val'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
val_evaluator
=
dict
(
type
=
'Accuracy'
,
topk
=
(
1
,
5
))
# If you want standard test, please manually configure the test dataset
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/tiny_imagenet_bs64_swin_224.py
0 → 100644
View file @
dff2c686
# dataset settings
dataset_type
=
'CustomDataset'
data_preprocessor
=
dict
(
num_classes
=
200
,
# RGB format normalization parameters
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
bgr_mean
=
data_preprocessor
[
'mean'
][::
-
1
]
bgr_std
=
data_preprocessor
[
'std'
][::
-
1
]
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
224
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
'RandAugment'
,
policies
=
'timm_increasing'
,
num_policies
=
2
,
total_level
=
10
,
magnitude_level
=
9
,
magnitude_std
=
0.5
,
hparams
=
dict
(
pad_val
=
[
round
(
x
)
for
x
in
bgr_mean
],
interpolation
=
'bicubic'
)),
dict
(
type
=
'RandomErasing'
,
erase_prob
=
0.25
,
mode
=
'rand'
,
min_area_ratio
=
0.02
,
max_area_ratio
=
1
/
3
,
fill_color
=
bgr_mean
,
fill_std
=
bgr_std
),
dict
(
type
=
'PackInputs'
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'ResizeEdge'
,
scale
=
256
,
edge
=
'short'
,
backend
=
'pillow'
,
interpolation
=
'bicubic'
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'PackInputs'
),
]
train_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
data_prefix
=
'train'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
)
val_dataloader
=
dict
(
batch_size
=
64
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/imagenet'
,
data_prefix
=
'val'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
val_evaluator
=
dict
(
type
=
'Accuracy'
,
topk
=
(
1
,
5
))
# If you want standard test, please manually configure the test dataset
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/vizwiz.py
0 → 100644
View file @
dff2c686
# data settings
data_preprocessor
=
dict
(
mean
=
[
122.770938
,
116.7460125
,
104.09373615
],
std
=
[
68.5005327
,
66.6321579
,
70.32316305
],
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
384
,
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'question'
,
'gt_answer'
,
'gt_answer_weight'
],
meta_keys
=
[
'question_id'
,
'image_id'
],
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'Resize'
,
scale
=
(
480
,
480
),
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
dict
(
type
=
'CleanCaption'
,
keys
=
[
'question'
],
),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'question'
,
'gt_answer'
,
'gt_answer_weight'
],
meta_keys
=
[
'question_id'
,
'image_id'
],
),
]
train_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'VizWiz'
,
data_root
=
'data/vizwiz/Images'
,
data_prefix
=
''
,
ann_file
=
'Annotations/train.json'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
persistent_workers
=
True
,
drop_last
=
True
,
)
val_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'VizWiz'
,
data_root
=
'data/vizwiz/Images'
,
data_prefix
=
''
,
ann_file
=
'Annotations/val.json'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
persistent_workers
=
True
,
)
val_evaluator
=
dict
(
type
=
'VizWizAcc'
)
test_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'VizWiz'
,
data_root
=
'data/vizwiz/Images'
,
data_prefix
=
''
,
ann_file
=
'Annotations/test.json'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
test_evaluator
=
dict
(
type
=
'ReportVQA'
,
file_path
=
'vqa_test.json'
)
configs/_base_/datasets/voc_bs16.py
0 → 100644
View file @
dff2c686
# dataset settings
dataset_type
=
'VOC'
data_preprocessor
=
dict
(
num_classes
=
20
,
# RGB format normalization parameters
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
# convert image from BGR to RGB
to_rgb
=
True
,
# generate onehot-format labels for multi-label classification.
to_onehot
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
224
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
'PackInputs'
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'ResizeEdge'
,
scale
=
256
,
edge
=
'short'
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'PackInputs'
,
# `gt_label_difficult` is needed for VOC evaluation
meta_keys
=
(
'sample_idx'
,
'img_path'
,
'ori_shape'
,
'img_shape'
,
'scale_factor'
,
'flip'
,
'flip_direction'
,
'gt_label_difficult'
)),
]
train_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/VOC2007'
,
split
=
'trainval'
,
pipeline
=
train_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
)
val_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
'data/VOC2007'
,
split
=
'test'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
)
test_dataloader
=
val_dataloader
# calculate precision_recall_f1 and mAP
val_evaluator
=
[
dict
(
type
=
'VOCMultiLabelMetric'
),
dict
(
type
=
'VOCMultiLabelMetric'
,
average
=
'micro'
),
dict
(
type
=
'VOCAveragePrecision'
)
]
test_dataloader
=
val_dataloader
test_evaluator
=
val_evaluator
configs/_base_/datasets/vsr.py
0 → 100644
View file @
dff2c686
# data settings
data_preprocessor
=
dict
(
mean
=
[
122.770938
,
116.7460125
,
104.09373615
],
std
=
[
68.5005327
,
66.6321579
,
70.32316305
],
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'RandomResizedCrop'
,
scale
=
384
,
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'question'
,
'gt_answer'
,
'gt_answer_weight'
],
meta_keys
=
[
'question_id'
,
'image_id'
],
),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'Resize'
,
scale
=
(
480
,
480
),
interpolation
=
'bicubic'
,
backend
=
'pillow'
),
dict
(
type
=
'CleanCaption'
,
keys
=
[
'question'
],
),
dict
(
type
=
'PackInputs'
,
algorithm_keys
=
[
'question'
,
'gt_answer'
,
'gt_answer_weight'
],
meta_keys
=
[
'question_id'
,
'image_id'
],
),
]
train_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'VSR'
,
data_root
=
'data/coco'
,
data_prefix
=
''
,
ann_file
=
'annotations/train.json'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
persistent_workers
=
True
,
drop_last
=
True
,
)
val_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'VSR'
,
data_root
=
'data/coco'
,
data_prefix
=
''
,
ann_file
=
'annotations/val.json'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
persistent_workers
=
True
,
)
val_evaluator
=
dict
(
type
=
'VSRAcc'
)
test_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
8
,
dataset
=
dict
(
type
=
'VSR'
,
data_root
=
'data/coco'
,
data_prefix
=
''
,
ann_file
=
'annotations/test.json'
,
pipeline
=
test_pipeline
),
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
persistent_workers
=
True
,
)
test_evaluator
=
val_evaluator
configs/_base_/default_runtime.py
0 → 100644
View file @
dff2c686
# defaults to use registries in mmpretrain
default_scope
=
'mmpretrain'
# configure default hooks
default_hooks
=
dict
(
# record the time of every iteration.
timer
=
dict
(
type
=
'IterTimerHook'
),
# print log every 100 iterations.
logger
=
dict
(
type
=
'LoggerHook'
,
interval
=
100
),
# enable the parameter scheduler.
param_scheduler
=
dict
(
type
=
'ParamSchedulerHook'
),
# save checkpoint per epoch.
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=
1
),
# set sampler seed in distributed evrionment.
sampler_seed
=
dict
(
type
=
'DistSamplerSeedHook'
),
# validation results visualization, set True to enable it.
visualization
=
dict
(
type
=
'VisualizationHook'
,
enable
=
False
),
)
# configure environment
env_cfg
=
dict
(
# whether to enable cudnn benchmark
cudnn_benchmark
=
False
,
# set multi process parameters
mp_cfg
=
dict
(
mp_start_method
=
'fork'
,
opencv_num_threads
=
0
),
# set distributed parameters
dist_cfg
=
dict
(
backend
=
'nccl'
),
)
# set visualizer
vis_backends
=
[
dict
(
type
=
'LocalVisBackend'
)]
visualizer
=
dict
(
type
=
'UniversalVisualizer'
,
vis_backends
=
vis_backends
)
# set log level
log_level
=
'INFO'
# load from which checkpoint
load_from
=
None
# whether to resume training from the loaded checkpoint
resume
=
False
# Defaults to use random seed and disable `deterministic`
randomness
=
dict
(
seed
=
None
,
deterministic
=
False
)
configs/_base_/models/conformer/base-p16.py
0 → 100644
View file @
dff2c686
# model settings
model
=
dict
(
type
=
'ImageClassifier'
,
backbone
=
dict
(
type
=
'Conformer'
,
arch
=
'base'
,
drop_path_rate
=
0.1
,
init_cfg
=
None
),
neck
=
None
,
head
=
dict
(
type
=
'ConformerHead'
,
num_classes
=
1000
,
in_channels
=
[
1536
,
576
],
init_cfg
=
None
,
loss
=
dict
(
type
=
'LabelSmoothLoss'
,
label_smooth_val
=
0.1
,
mode
=
'original'
),
cal_acc
=
False
),
init_cfg
=
[
dict
(
type
=
'TruncNormal'
,
layer
=
'Linear'
,
std
=
0.02
,
bias
=
0.
),
dict
(
type
=
'Constant'
,
layer
=
'LayerNorm'
,
val
=
1.
,
bias
=
0.
)
],
train_cfg
=
dict
(
augments
=
[
dict
(
type
=
'Mixup'
,
alpha
=
0.8
),
dict
(
type
=
'CutMix'
,
alpha
=
1.0
)
]),
)
configs/_base_/models/conformer/small-p16.py
0 → 100644
View file @
dff2c686
# model settings
model
=
dict
(
type
=
'ImageClassifier'
,
backbone
=
dict
(
type
=
'Conformer'
,
arch
=
'small'
,
drop_path_rate
=
0.1
,
init_cfg
=
None
),
neck
=
None
,
head
=
dict
(
type
=
'ConformerHead'
,
num_classes
=
1000
,
in_channels
=
[
1024
,
384
],
init_cfg
=
None
,
loss
=
dict
(
type
=
'LabelSmoothLoss'
,
label_smooth_val
=
0.1
,
mode
=
'original'
),
cal_acc
=
False
),
init_cfg
=
[
dict
(
type
=
'TruncNormal'
,
layer
=
'Linear'
,
std
=
0.02
,
bias
=
0.
),
dict
(
type
=
'Constant'
,
layer
=
'LayerNorm'
,
val
=
1.
,
bias
=
0.
)
],
train_cfg
=
dict
(
augments
=
[
dict
(
type
=
'Mixup'
,
alpha
=
0.8
),
dict
(
type
=
'CutMix'
,
alpha
=
1.0
)
]),
)
Prev
1
2
3
4
5
6
7
8
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment