Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmpretrain
Commits
cbc25585
Commit
cbc25585
authored
Jun 24, 2025
by
limm
Browse files
add mmpretrain/ part
parent
1baf0566
Pipeline
#2801
canceled with stages
Changes
268
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
945 additions
and
0 deletions
+945
-0
mmpretrain/configs/swin_transformer_v2/swinv2_large_w12_8xb128_in21k_192px.py
...win_transformer_v2/swinv2_large_w12_8xb128_in21k_192px.py
+32
-0
mmpretrain/configs/swin_transformer_v2/swinv2_large_w16_in21k_pre_16xb64_in1k_256px.py
...former_v2/swinv2_large_w16_in21k_pre_16xb64_in1k_256px.py
+24
-0
mmpretrain/configs/swin_transformer_v2/swinv2_large_w24_in21k_pre_16xb64_in1k_384px.py
...former_v2/swinv2_large_w24_in21k_pre_16xb64_in1k_384px.py
+24
-0
mmpretrain/configs/swin_transformer_v2/swinv2_small_w16_16xb64_in1k_256px.py
...swin_transformer_v2/swinv2_small_w16_16xb64_in1k_256px.py
+28
-0
mmpretrain/configs/swin_transformer_v2/swinv2_small_w8_16xb64_in1k_256px.py
.../swin_transformer_v2/swinv2_small_w8_16xb64_in1k_256px.py
+24
-0
mmpretrain/configs/swin_transformer_v2/swinv2_tiny_w16_16xb64_in1k_256px.py
.../swin_transformer_v2/swinv2_tiny_w16_16xb64_in1k_256px.py
+28
-0
mmpretrain/configs/swin_transformer_v2/swinv2_tiny_w8_16xb64_in1k_256px.py
...s/swin_transformer_v2/swinv2_tiny_w8_16xb64_in1k_256px.py
+24
-0
mmpretrain/configs/vision_transformer/vit_base_p16_32xb128_mae_in1k.py
...nfigs/vision_transformer/vit_base_p16_32xb128_mae_in1k.py
+52
-0
mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k.py
...in/configs/vision_transformer/vit_base_p16_64xb64_in1k.py
+20
-0
mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k_384px.py
...figs/vision_transformer/vit_base_p16_64xb64_in1k_384px.py
+44
-0
mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k.py
...in/configs/vision_transformer/vit_base_p32_64xb64_in1k.py
+26
-0
mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py
...figs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py
+48
-0
mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py
...n/configs/vision_transformer/vit_large_p16_64xb64_in1k.py
+27
-0
mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py
...igs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py
+49
-0
mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py
...n/configs/vision_transformer/vit_large_p32_64xb64_in1k.py
+27
-0
mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py
...igs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py
+49
-0
mmpretrain/datasets/__init__.py
mmpretrain/datasets/__init__.py
+62
-0
mmpretrain/datasets/base_dataset.py
mmpretrain/datasets/base_dataset.py
+219
-0
mmpretrain/datasets/builder.py
mmpretrain/datasets/builder.py
+25
-0
mmpretrain/datasets/caltech101.py
mmpretrain/datasets/caltech101.py
+113
-0
No files found.
Too many changes to show.
To preserve performance only
268 of 268+
files are displayed.
Plain diff
Email patch
mmpretrain/configs/swin_transformer_v2/swinv2_large_w12_8xb128_in21k_192px.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmengine.model
import
ConstantInit
,
TruncNormalInit
from
mmpretrain.models
import
CutMix
,
Mixup
with
read_base
():
from
.._base_.datasets.imagenet21k_bs128
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.swin_transformer_v2_base
import
*
from
.._base_.schedules.imagenet_bs1024_adamw_swin
import
*
# model settings
model
.
update
(
backbone
=
dict
(
img_size
=
192
,
drop_path_rate
=
0.5
,
window_size
=
[
12
,
12
,
12
,
6
]),
head
=
dict
(
num_classes
=
21841
),
init_cfg
=
[
dict
(
type
=
TruncNormalInit
,
layer
=
'Linear'
,
std
=
0.02
,
bias
=
0.
),
dict
(
type
=
ConstantInit
,
layer
=
'LayerNorm'
,
val
=
1.
,
bias
=
0.
)
],
train_cfg
=
dict
(
augments
=
[
dict
(
type
=
Mixup
,
alpha
=
0.8
),
dict
(
type
=
CutMix
,
alpha
=
1.0
)]))
# dataset settings
data_preprocessor
=
dict
(
num_classes
=
21841
)
_base_
[
'train_pipeline'
][
1
][
'scale'
]
=
192
# RandomResizedCrop
_base_
[
'test_pipeline'
][
1
][
'scale'
]
=
219
# ResizeEdge
_base_
[
'test_pipeline'
][
2
][
'crop_size'
]
=
192
# CenterCrop
mmpretrain/configs/swin_transformer_v2/swinv2_large_w16_in21k_pre_16xb64_in1k_256px.py
0 → 100644
View file @
cbc25585
# Only for evaluation
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.models
import
CrossEntropyLoss
with
read_base
():
from
.._base_.datasets.imagenet_bs64_swin_256
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.swin_transformer_v2_base
import
*
from
.._base_.schedules.imagenet_bs1024_adamw_swin
import
*
# model settings
model
.
update
(
backbone
=
dict
(
arch
=
'large'
,
img_size
=
256
,
window_size
=
[
16
,
16
,
16
,
8
],
pretrained_window_sizes
=
[
12
,
12
,
12
,
6
]),
head
=
dict
(
in_channels
=
1536
,
loss
=
dict
(
type
=
CrossEntropyLoss
,
loss_weight
=
1.0
),
topk
=
(
1
,
5
)))
mmpretrain/configs/swin_transformer_v2/swinv2_large_w24_in21k_pre_16xb64_in1k_384px.py
0 → 100644
View file @
cbc25585
# Only for evaluation
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.models
import
CrossEntropyLoss
with
read_base
():
from
.._base_.datasets.imagenet_bs64_swin_384
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.swin_transformer_v2_base
import
*
from
.._base_.schedules.imagenet_bs1024_adamw_swin
import
*
# model settings
model
.
update
(
backbone
=
dict
(
arch
=
'large'
,
img_size
=
384
,
window_size
=
[
24
,
24
,
24
,
12
],
pretrained_window_sizes
=
[
12
,
12
,
12
,
6
]),
head
=
dict
(
in_channels
=
1536
,
loss
=
dict
(
type
=
CrossEntropyLoss
,
loss_weight
=
1.0
),
topk
=
(
1
,
5
)))
mmpretrain/configs/swin_transformer_v2/swinv2_small_w16_16xb64_in1k_256px.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmengine.model
import
ConstantInit
,
TruncNormalInit
from
mmpretrain.models
import
CutMix
,
Mixup
with
read_base
():
from
.._base_.datasets.imagenet_bs64_swin_256
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.swin_transformer_v2_base
import
*
from
.._base_.schedules.imagenet_bs1024_adamw_swin
import
*
# model settings
model
.
update
(
backbone
=
dict
(
arch
=
'small'
,
img_size
=
256
,
drop_path_rate
=
0.3
,
window_size
=
[
16
,
16
,
16
,
8
]),
head
=
dict
(
in_channels
=
768
),
init_cfg
=
[
dict
(
type
=
TruncNormalInit
,
layer
=
'Linear'
,
std
=
0.02
,
bias
=
0.
),
dict
(
type
=
ConstantInit
,
layer
=
'LayerNorm'
,
val
=
1.
,
bias
=
0.
)
],
train_cfg
=
dict
(
augments
=
[
dict
(
type
=
Mixup
,
alpha
=
0.8
),
dict
(
type
=
CutMix
,
alpha
=
1.0
)]))
mmpretrain/configs/swin_transformer_v2/swinv2_small_w8_16xb64_in1k_256px.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmengine.model
import
ConstantInit
,
TruncNormalInit
from
mmpretrain.models
import
CutMix
,
Mixup
with
read_base
():
from
.._base_.datasets.imagenet_bs64_swin_256
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.swin_transformer_v2_base
import
*
from
.._base_.schedules.imagenet_bs1024_adamw_swin
import
*
# model settings
model
.
update
(
backbone
=
dict
(
arch
=
'small'
,
img_size
=
256
,
drop_path_rate
=
0.3
),
head
=
dict
(
in_channels
=
768
),
init_cfg
=
[
dict
(
type
=
TruncNormalInit
,
layer
=
'Linear'
,
std
=
0.02
,
bias
=
0.
),
dict
(
type
=
ConstantInit
,
layer
=
'LayerNorm'
,
val
=
1.
,
bias
=
0.
)
],
train_cfg
=
dict
(
augments
=
[
dict
(
type
=
Mixup
,
alpha
=
0.8
),
dict
(
type
=
CutMix
,
alpha
=
1.0
)]))
mmpretrain/configs/swin_transformer_v2/swinv2_tiny_w16_16xb64_in1k_256px.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmengine.model
import
ConstantInit
,
TruncNormalInit
from
mmpretrain.models
import
CutMix
,
Mixup
with
read_base
():
from
.._base_.datasets.imagenet_bs64_swin_256
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.swin_transformer_v2_base
import
*
from
.._base_.schedules.imagenet_bs1024_adamw_swin
import
*
# model settings
model
.
update
(
backbone
=
dict
(
arch
=
'tiny'
,
img_size
=
256
,
drop_path_rate
=
0.2
,
window_size
=
[
16
,
16
,
16
,
8
]),
head
=
dict
(
in_channels
=
768
),
init_cfg
=
[
dict
(
type
=
TruncNormalInit
,
layer
=
'Linear'
,
std
=
0.02
,
bias
=
0.
),
dict
(
type
=
ConstantInit
,
layer
=
'LayerNorm'
,
val
=
1.
,
bias
=
0.
)
],
train_cfg
=
dict
(
augments
=
[
dict
(
type
=
Mixup
,
alpha
=
0.8
),
dict
(
type
=
CutMix
,
alpha
=
1.0
)]))
mmpretrain/configs/swin_transformer_v2/swinv2_tiny_w8_16xb64_in1k_256px.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmengine.model
import
ConstantInit
,
TruncNormalInit
from
mmpretrain.models
import
CutMix
,
Mixup
with
read_base
():
from
.._base_.datasets.imagenet_bs64_swin_256
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.swin_transformer_v2_base
import
*
from
.._base_.schedules.imagenet_bs1024_adamw_swin
import
*
# model settings
model
.
update
(
backbone
=
dict
(
arch
=
'tiny'
,
img_size
=
256
,
drop_path_rate
=
0.2
),
head
=
dict
(
in_channels
=
768
),
init_cfg
=
[
dict
(
type
=
TruncNormalInit
,
layer
=
'Linear'
,
std
=
0.02
,
bias
=
0.
),
dict
(
type
=
ConstantInit
,
layer
=
'LayerNorm'
,
val
=
1.
,
bias
=
0.
)
],
train_cfg
=
dict
(
augments
=
[
dict
(
type
=
Mixup
,
alpha
=
0.8
),
dict
(
type
=
CutMix
,
alpha
=
1.0
)]))
mmpretrain/configs/vision_transformer/vit_base_p16_32xb128_mae_in1k.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmengine.model
import
ConstantInit
,
TruncNormalInit
from
torch.optim
import
AdamW
from
mmpretrain.engine
import
EMAHook
from
mmpretrain.models
import
CutMix
,
Mixup
with
read_base
():
from
.._base_.datasets.imagenet_bs64_swin_224
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.vit_base_p16
import
*
from
.._base_.schedules.imagenet_bs1024_adamw_swin
import
*
model
.
update
(
backbone
=
dict
(
drop_rate
=
0
,
drop_path_rate
=
0.1
,
init_cfg
=
None
),
head
=
dict
(
loss
=
dict
(
mode
=
'original'
)),
init_cfg
=
[
dict
(
type
=
TruncNormalInit
,
layer
=
'Linear'
,
std
=
.
02
),
dict
(
type
=
ConstantInit
,
layer
=
'LayerNorm'
,
val
=
1.
,
bias
=
0.
),
],
train_cfg
=
dict
(
augments
=
[
dict
(
type
=
Mixup
,
alpha
=
0.8
),
dict
(
type
=
CutMix
,
alpha
=
1.0
)]))
# dataset settings
train_dataloader
.
update
(
batch_size
=
128
)
# schedule settings
optim_wrapper
.
update
(
optimizer
=
dict
(
type
=
AdamW
,
lr
=
1e-4
*
4096
/
256
,
weight_decay
=
0.3
,
eps
=
1e-8
,
betas
=
(
0.9
,
0.95
)),
paramwise_cfg
=
dict
(
norm_decay_mult
=
0.0
,
bias_decay_mult
=
0.0
,
custom_keys
=
{
'.cls_token'
:
dict
(
decay_mult
=
0.0
),
'.pos_embed'
:
dict
(
decay_mult
=
0.0
)
}))
# runtime settings
custom_hooks
=
[
dict
(
type
=
EMAHook
,
momentum
=
1e-4
)]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr
.
update
(
base_batch_size
=
4096
)
mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.models
import
Mixup
with
read_base
():
from
.._base_.datasets.imagenet_bs64_pil_resize_autoaug
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.vit_base_p16
import
*
from
.._base_.schedules.imagenet_bs4096_adamw
import
*
# model setting
model
.
update
(
head
=
dict
(
hidden_dim
=
3072
),
train_cfg
=
dict
(
augments
=
dict
(
type
=
Mixup
,
alpha
=
0.2
)),
)
# schedule setting
optim_wrapper
.
update
(
clip_grad
=
dict
(
max_norm
=
1.0
))
mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k_384px.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.datasets
import
(
CenterCrop
,
LoadImageFromFile
,
PackInputs
,
RandomFlip
,
RandomResizedCrop
,
ResizeEdge
)
with
read_base
():
from
.._base_.datasets.imagenet_bs64_pil_resize
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.vit_base_p16
import
*
from
.._base_.schedules.imagenet_bs4096_adamw
import
*
# model setting
model
.
update
(
backbone
=
dict
(
img_size
=
384
))
# dataset setting
data_preprocessor
.
update
(
mean
=
[
127.5
,
127.5
,
127.5
],
std
=
[
127.5
,
127.5
,
127.5
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
LoadImageFromFile
),
dict
(
type
=
RandomResizedCrop
,
scale
=
384
,
backend
=
'pillow'
),
dict
(
type
=
RandomFlip
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
PackInputs
),
]
test_pipeline
=
[
dict
(
type
=
LoadImageFromFile
),
dict
(
type
=
ResizeEdge
,
scale
=
384
,
edge
=
'short'
,
backend
=
'pillow'
),
dict
(
type
=
CenterCrop
,
crop_size
=
384
),
dict
(
type
=
PackInputs
),
]
train_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
train_pipeline
))
val_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
test_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# schedule setting
optim_wrapper
.
update
(
clip_grad
=
dict
(
max_norm
=
1.0
))
mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.models
import
CrossEntropyLoss
,
Mixup
with
read_base
():
from
.._base_.datasets.imagenet_bs64_pil_resize_autoaug
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.vit_base_p16
import
*
from
.._base_.schedules.imagenet_bs4096_adamw
import
*
# model setting
model
.
update
(
backbone
=
dict
(
patch_size
=
32
),
head
=
dict
(
hidden_dim
=
3072
,
topk
=
(
1
,
5
),
),
train_cfg
=
dict
(
augments
=
dict
(
type
=
Mixup
,
alpha
=
0.2
)),
)
model
.
head
.
loss
=
dict
(
type
=
CrossEntropyLoss
,
loss_weight
=
1.0
)
# schedule setting
optim_wrapper
.
update
(
clip_grad
=
dict
(
max_norm
=
1.0
))
mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.datasets
import
(
CenterCrop
,
LoadImageFromFile
,
PackInputs
,
RandomFlip
,
RandomResizedCrop
,
ResizeEdge
)
from
mmpretrain.models
import
CrossEntropyLoss
with
read_base
():
from
.._base_.datasets.imagenet_bs64_pil_resize
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.vit_base_p16
import
*
from
.._base_.schedules.imagenet_bs4096_adamw
import
*
# model setting
model
.
update
(
backbone
=
dict
(
img_size
=
384
,
patch_size
=
32
),
head
=
dict
(
topk
=
(
1
,
5
)))
model
.
head
.
loss
=
dict
(
type
=
CrossEntropyLoss
,
loss_weight
=
1.0
)
# dataset setting
data_preprocessor
.
update
(
mean
=
[
127.5
,
127.5
,
127.5
],
std
=
[
127.5
,
127.5
,
127.5
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
LoadImageFromFile
),
dict
(
type
=
RandomResizedCrop
,
scale
=
384
,
backend
=
'pillow'
),
dict
(
type
=
RandomFlip
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
PackInputs
),
]
test_pipeline
=
[
dict
(
type
=
LoadImageFromFile
),
dict
(
type
=
ResizeEdge
,
scale
=
384
,
edge
=
'short'
,
backend
=
'pillow'
),
dict
(
type
=
CenterCrop
,
crop_size
=
384
),
dict
(
type
=
PackInputs
),
]
train_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
train_pipeline
))
val_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
test_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# schedule setting
optim_wrapper
.
update
(
clip_grad
=
dict
(
max_norm
=
1.0
))
mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.models
import
CrossEntropyLoss
,
Mixup
with
read_base
():
from
.._base_.datasets.imagenet_bs64_pil_resize_autoaug
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.vit_base_p16
import
*
from
.._base_.schedules.imagenet_bs4096_adamw
import
*
# model setting
model
.
update
(
backbone
=
dict
(
arch
=
'l'
),
head
=
dict
(
hidden_dim
=
3072
,
in_channels
=
1024
,
topk
=
(
1
,
5
),
),
train_cfg
=
dict
(
augments
=
dict
(
type
=
Mixup
,
alpha
=
0.2
)),
)
model
.
head
.
loss
=
dict
(
type
=
CrossEntropyLoss
,
loss_weight
=
1.0
)
# schedule setting
optim_wrapper
.
update
(
clip_grad
=
dict
(
max_norm
=
1.0
))
mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.datasets
import
(
CenterCrop
,
LoadImageFromFile
,
PackInputs
,
RandomFlip
,
RandomResizedCrop
,
ResizeEdge
)
from
mmpretrain.models
import
CrossEntropyLoss
with
read_base
():
from
.._base_.datasets.imagenet_bs64_pil_resize
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.vit_base_p16
import
*
from
.._base_.schedules.imagenet_bs4096_adamw
import
*
# model setting
model
.
update
(
backbone
=
dict
(
arch
=
'l'
,
img_size
=
384
),
head
=
dict
(
in_channels
=
1024
,
topk
=
(
1
,
5
)))
model
.
head
.
loss
=
dict
(
type
=
CrossEntropyLoss
,
loss_weight
=
1.0
)
# dataset setting
data_preprocessor
.
update
(
mean
=
[
127.5
,
127.5
,
127.5
],
std
=
[
127.5
,
127.5
,
127.5
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
LoadImageFromFile
),
dict
(
type
=
RandomResizedCrop
,
scale
=
384
,
backend
=
'pillow'
),
dict
(
type
=
RandomFlip
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
PackInputs
),
]
test_pipeline
=
[
dict
(
type
=
LoadImageFromFile
),
dict
(
type
=
ResizeEdge
,
scale
=
384
,
edge
=
'short'
,
backend
=
'pillow'
),
dict
(
type
=
CenterCrop
,
crop_size
=
384
),
dict
(
type
=
PackInputs
),
]
train_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
train_pipeline
))
val_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
test_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# schedule setting
optim_wrapper
.
update
(
clip_grad
=
dict
(
max_norm
=
1.0
))
mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.models
import
CrossEntropyLoss
,
Mixup
with
read_base
():
from
.._base_.datasets.imagenet_bs64_pil_resize_autoaug
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.vit_base_p16
import
*
from
.._base_.schedules.imagenet_bs4096_adamw
import
*
# model setting
model
.
update
(
backbone
=
dict
(
arch
=
'l'
,
patch_size
=
32
),
head
=
dict
(
hidden_dim
=
3072
,
in_channels
=
1024
,
topk
=
(
1
,
5
),
),
train_cfg
=
dict
(
augments
=
dict
(
type
=
Mixup
,
alpha
=
0.2
)),
)
loss
=
dict
(
type
=
CrossEntropyLoss
,
loss_weight
=
1.0
)
# schedule setting
optim_wrapper
.
update
(
clip_grad
=
dict
(
max_norm
=
1.0
))
mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from
mmengine.config
import
read_base
from
mmpretrain.datasets
import
(
CenterCrop
,
LoadImageFromFile
,
PackInputs
,
RandomFlip
,
RandomResizedCrop
,
ResizeEdge
)
from
mmpretrain.models
import
CrossEntropyLoss
with
read_base
():
from
.._base_.datasets.imagenet_bs64_pil_resize
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.vit_base_p16
import
*
from
.._base_.schedules.imagenet_bs4096_adamw
import
*
# model setting
model
.
update
(
backbone
=
dict
(
arch
=
'l'
,
img_size
=
384
,
patch_size
=
32
),
head
=
dict
(
in_channels
=
1024
,
topk
=
(
1
,
5
)))
model
.
head
.
loss
=
dict
(
type
=
CrossEntropyLoss
,
loss_weight
=
1.0
)
# dataset setting
data_preprocessor
.
update
(
mean
=
[
127.5
,
127.5
,
127.5
],
std
=
[
127.5
,
127.5
,
127.5
],
# convert image from BGR to RGB
to_rgb
=
True
,
)
train_pipeline
=
[
dict
(
type
=
LoadImageFromFile
),
dict
(
type
=
RandomResizedCrop
,
scale
=
384
,
backend
=
'pillow'
),
dict
(
type
=
RandomFlip
,
prob
=
0.5
,
direction
=
'horizontal'
),
dict
(
type
=
PackInputs
),
]
test_pipeline
=
[
dict
(
type
=
LoadImageFromFile
),
dict
(
type
=
ResizeEdge
,
scale
=
384
,
edge
=
'short'
,
backend
=
'pillow'
),
dict
(
type
=
CenterCrop
,
crop_size
=
384
),
dict
(
type
=
PackInputs
),
]
train_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
train_pipeline
))
val_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
test_dataloader
.
update
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# schedule setting
optim_wrapper
.
update
(
clip_grad
=
dict
(
max_norm
=
1.0
))
mmpretrain/datasets/__init__.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
from
mmpretrain.utils.dependency
import
WITH_MULTIMODAL
from
.base_dataset
import
BaseDataset
from
.builder
import
build_dataset
from
.caltech101
import
Caltech101
from
.cifar
import
CIFAR10
,
CIFAR100
from
.cub
import
CUB
from
.custom
import
CustomDataset
from
.dataset_wrappers
import
KFoldDataset
from
.dtd
import
DTD
from
.fgvcaircraft
import
FGVCAircraft
from
.flowers102
import
Flowers102
from
.food101
import
Food101
from
.imagenet
import
ImageNet
,
ImageNet21k
from
.inshop
import
InShop
from
.mnist
import
MNIST
,
FashionMNIST
from
.multi_label
import
MultiLabelDataset
from
.multi_task
import
MultiTaskDataset
from
.nlvr2
import
NLVR2
from
.oxfordiiitpet
import
OxfordIIITPet
from
.places205
import
Places205
from
.samplers
import
*
# noqa: F401,F403
from
.stanfordcars
import
StanfordCars
from
.sun397
import
SUN397
from
.transforms
import
*
# noqa: F401,F403
from
.voc
import
VOC
__all__
=
[
'BaseDataset'
,
'CIFAR10'
,
'CIFAR100'
,
'CUB'
,
'Caltech101'
,
'CustomDataset'
,
'DTD'
,
'FGVCAircraft'
,
'FashionMNIST'
,
'Flowers102'
,
'Food101'
,
'ImageNet'
,
'ImageNet21k'
,
'InShop'
,
'KFoldDataset'
,
'MNIST'
,
'MultiLabelDataset'
,
'MultiTaskDataset'
,
'NLVR2'
,
'OxfordIIITPet'
,
'Places205'
,
'SUN397'
,
'StanfordCars'
,
'VOC'
,
'build_dataset'
]
if
WITH_MULTIMODAL
:
from
.coco_caption
import
COCOCaption
from
.coco_retrieval
import
COCORetrieval
from
.coco_vqa
import
COCOVQA
from
.flamingo
import
FlamingoEvalCOCOCaption
,
FlamingoEvalCOCOVQA
from
.flickr30k_caption
import
Flickr30kCaption
from
.flickr30k_retrieval
import
Flickr30kRetrieval
from
.gqa_dataset
import
GQA
from
.iconqa
import
IconQA
from
.infographic_vqa
import
InfographicVQA
from
.minigpt4_dataset
import
MiniGPT4Dataset
from
.nocaps
import
NoCaps
from
.ocr_vqa
import
OCRVQA
from
.refcoco
import
RefCOCO
from
.scienceqa
import
ScienceQA
from
.textvqa
import
TextVQA
from
.visual_genome
import
VisualGenomeQA
from
.vizwiz
import
VizWiz
from
.vsr
import
VSR
__all__
.
extend
([
'COCOCaption'
,
'COCORetrieval'
,
'COCOVQA'
,
'FlamingoEvalCOCOCaption'
,
'FlamingoEvalCOCOVQA'
,
'Flickr30kCaption'
,
'Flickr30kRetrieval'
,
'RefCOCO'
,
'VisualGenomeQA'
,
'ScienceQA'
,
'NoCaps'
,
'GQA'
,
'TextVQA'
,
'VSR'
,
'VizWiz'
,
'OCRVQA'
,
'InfographicVQA'
,
'IconQA'
,
'MiniGPT4Dataset'
])
mmpretrain/datasets/base_dataset.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
import
os.path
as
osp
from
os
import
PathLike
from
typing
import
List
,
Optional
,
Sequence
,
Union
import
mmengine
import
numpy
as
np
from
mmengine.dataset
import
BaseDataset
as
_BaseDataset
from
mmpretrain.registry
import
DATASETS
,
TRANSFORMS
def
expanduser
(
path
):
"""Expand ~ and ~user constructions.
If user or $HOME is unknown, do nothing.
"""
if
isinstance
(
path
,
(
str
,
PathLike
)):
return
osp
.
expanduser
(
path
)
else
:
return
path
@
DATASETS
.
register_module
()
class
BaseDataset
(
_BaseDataset
):
"""Base dataset for image classification task.
This dataset support annotation file in `OpenMMLab 2.0 style annotation
format`.
.. _OpenMMLab 2.0 style annotation format:
https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/basedataset.md
Comparing with the :class:`mmengine.BaseDataset`, this class implemented
several useful methods.
Args:
ann_file (str): Annotation file path.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_root (str): The root directory for ``data_prefix`` and
``ann_file``. Defaults to ''.
data_prefix (str | dict): Prefix for training data. Defaults to ''.
filter_cfg (dict, optional): Config for filter data. Defaults to None.
indices (int or Sequence[int], optional): Support using first few
data in annotation file to facilitate training/testing on a smaller
dataset. Defaults to None, which means using all ``data_infos``.
serialize_data (bool): Whether to hold memory using serialized objects,
when enabled, data loader workers can use shared RAM from master
process instead of making a copy. Defaults to True.
pipeline (Sequence): Processing pipeline. Defaults to an empty tuple.
test_mode (bool, optional): ``test_mode=True`` means in test phase,
an error will be raised when getting an item fails, ``test_mode=False``
means in training phase, another item will be returned randomly.
Defaults to False.
lazy_init (bool): Whether to load annotation during instantiation.
In some cases, such as visualization, only the meta information of
the dataset is needed, which is not necessary to load annotation
file. ``Basedataset`` can skip load annotations to save time by set
``lazy_init=False``. Defaults to False.
max_refetch (int): If ``Basedataset.prepare_data`` get a None img.
The maximum extra number of cycles to get a valid image.
Defaults to 1000.
classes (str | Sequence[str], optional): Specify names of classes.
- If is string, it should be a file path, and the every line of
the file is a name of a class.
- If is a sequence of string, every item is a name of class.
- If is None, use categories information in ``metainfo`` argument,
annotation file or the class attribute ``METAINFO``.
Defaults to None.
"""
# noqa: E501
def
__init__
(
self
,
ann_file
:
str
,
metainfo
:
Optional
[
dict
]
=
None
,
data_root
:
str
=
''
,
data_prefix
:
Union
[
str
,
dict
]
=
''
,
filter_cfg
:
Optional
[
dict
]
=
None
,
indices
:
Optional
[
Union
[
int
,
Sequence
[
int
]]]
=
None
,
serialize_data
:
bool
=
True
,
pipeline
:
Sequence
=
(),
test_mode
:
bool
=
False
,
lazy_init
:
bool
=
False
,
max_refetch
:
int
=
1000
,
classes
:
Union
[
str
,
Sequence
[
str
],
None
]
=
None
):
if
isinstance
(
data_prefix
,
str
):
data_prefix
=
dict
(
img_path
=
expanduser
(
data_prefix
))
ann_file
=
expanduser
(
ann_file
)
metainfo
=
self
.
_compat_classes
(
metainfo
,
classes
)
transforms
=
[]
for
transform
in
pipeline
:
if
isinstance
(
transform
,
dict
):
transforms
.
append
(
TRANSFORMS
.
build
(
transform
))
else
:
transforms
.
append
(
transform
)
super
().
__init__
(
ann_file
=
ann_file
,
metainfo
=
metainfo
,
data_root
=
data_root
,
data_prefix
=
data_prefix
,
filter_cfg
=
filter_cfg
,
indices
=
indices
,
serialize_data
=
serialize_data
,
pipeline
=
transforms
,
test_mode
=
test_mode
,
lazy_init
=
lazy_init
,
max_refetch
=
max_refetch
)
@
property
def
img_prefix
(
self
):
"""The prefix of images."""
return
self
.
data_prefix
[
'img_path'
]
@
property
def
CLASSES
(
self
):
"""Return all categories names."""
return
self
.
_metainfo
.
get
(
'classes'
,
None
)
@
property
def
class_to_idx
(
self
):
"""Map mapping class name to class index.
Returns:
dict: mapping from class name to class index.
"""
return
{
cat
:
i
for
i
,
cat
in
enumerate
(
self
.
CLASSES
)}
def
get_gt_labels
(
self
):
"""Get all ground-truth labels (categories).
Returns:
np.ndarray: categories for all images.
"""
gt_labels
=
np
.
array
(
[
self
.
get_data_info
(
i
)[
'gt_label'
]
for
i
in
range
(
len
(
self
))])
return
gt_labels
def
get_cat_ids
(
self
,
idx
:
int
)
->
List
[
int
]:
"""Get category id by index.
Args:
idx (int): Index of data.
Returns:
cat_ids (List[int]): Image category of specified index.
"""
return
[
int
(
self
.
get_data_info
(
idx
)[
'gt_label'
])]
def
_compat_classes
(
self
,
metainfo
,
classes
):
"""Merge the old style ``classes`` arguments to ``metainfo``."""
if
isinstance
(
classes
,
str
):
# take it as a file path
class_names
=
mmengine
.
list_from_file
(
expanduser
(
classes
))
elif
isinstance
(
classes
,
(
tuple
,
list
)):
class_names
=
classes
elif
classes
is
not
None
:
raise
ValueError
(
f
'Unsupported type
{
type
(
classes
)
}
of classes.'
)
if
metainfo
is
None
:
metainfo
=
{}
if
classes
is
not
None
:
metainfo
=
{
'classes'
:
tuple
(
class_names
),
**
metainfo
}
return
metainfo
def
full_init
(
self
):
"""Load annotation file and set ``BaseDataset._fully_initialized`` to
True."""
super
().
full_init
()
# To support the standard OpenMMLab 2.0 annotation format. Generate
# metainfo in internal format from standard metainfo format.
if
'categories'
in
self
.
_metainfo
and
'classes'
not
in
self
.
_metainfo
:
categories
=
sorted
(
self
.
_metainfo
[
'categories'
],
key
=
lambda
x
:
x
[
'id'
])
self
.
_metainfo
[
'classes'
]
=
tuple
(
[
cat
[
'category_name'
]
for
cat
in
categories
])
def
__repr__
(
self
):
"""Print the basic information of the dataset.
Returns:
str: Formatted string.
"""
head
=
'Dataset '
+
self
.
__class__
.
__name__
body
=
[]
if
self
.
_fully_initialized
:
body
.
append
(
f
'Number of samples:
\t
{
self
.
__len__
()
}
'
)
else
:
body
.
append
(
"Haven't been initialized"
)
if
self
.
CLASSES
is
not
None
:
body
.
append
(
f
'Number of categories:
\t
{
len
(
self
.
CLASSES
)
}
'
)
body
.
extend
(
self
.
extra_repr
())
if
len
(
self
.
pipeline
.
transforms
)
>
0
:
body
.
append
(
'With transforms:'
)
for
t
in
self
.
pipeline
.
transforms
:
body
.
append
(
f
'
{
t
}
'
)
lines
=
[
head
]
+
[
' '
*
4
+
line
for
line
in
body
]
return
'
\n
'
.
join
(
lines
)
def
extra_repr
(
self
)
->
List
[
str
]:
"""The extra repr information of the dataset."""
body
=
[]
body
.
append
(
f
'Annotation file:
\t
{
self
.
ann_file
}
'
)
body
.
append
(
f
'Prefix of images:
\t
{
self
.
img_prefix
}
'
)
return
body
mmpretrain/datasets/builder.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
from
mmpretrain.registry
import
DATASETS
def
build_dataset
(
cfg
):
"""Build dataset.
Examples:
>>> from mmpretrain.datasets import build_dataset
>>> mnist_train = build_dataset(
... dict(type='MNIST', data_prefix='data/mnist/', test_mode=False))
>>> print(mnist_train)
Dataset MNIST
Number of samples: 60000
Number of categories: 10
Prefix of data: data/mnist/
>>> mnist_test = build_dataset(
... dict(type='MNIST', data_prefix='data/mnist/', test_mode=True))
>>> print(mnist_test)
Dataset MNIST
Number of samples: 10000
Number of categories: 10
Prefix of data: data/mnist/
"""
return
DATASETS
.
build
(
cfg
)
mmpretrain/datasets/caltech101.py
0 → 100644
View file @
cbc25585
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
from
mmengine
import
get_file_backend
,
list_from_file
from
mmpretrain.registry
import
DATASETS
from
.base_dataset
import
BaseDataset
from
.categories
import
CALTECH101_CATEGORIES
@
DATASETS
.
register_module
()
class
Caltech101
(
BaseDataset
):
"""The Caltech101 Dataset.
Support the `Caltech101 <https://data.caltech.edu/records/mzrjq-6wc02>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
Caltech101 dataset directory: ::
caltech-101
├── 101_ObjectCategories
│ ├── class_x
│ │ ├── xx1.jpg
│ │ ├── xx2.jpg
│ │ └── ...
│ ├── class_y
│ │ ├── yy1.jpg
│ │ ├── yy2.jpg
│ │ └── ...
│ └── ...
├── Annotations
│ ├── class_x
│ │ ├── xx1.mat
│ │ └── ...
│ └── ...
├── meta
│ ├── train.txt
│ └── test.txt
└── ....
Please note that since there is no official splitting for training and
test set, you can use the train.txt and text.txt provided by us or
create your own annotation files. Here is the download
`link <https://download.openmmlab.com/mmpretrain/datasets/caltech_meta.zip>`_
for the annotations.
Args:
data_root (str): The root directory for the Caltech101 dataset.
split (str, optional): The dataset split, supports "train" and "test".
Default to "train".
Examples:
>>> from mmpretrain.datasets import Caltech101
>>> train_dataset = Caltech101(data_root='data/caltech-101', split='train')
>>> train_dataset
Dataset Caltech101
Number of samples: 3060
Number of categories: 102
Root of dataset: data/caltech-101
>>> test_dataset = Caltech101(data_root='data/caltech-101', split='test')
>>> test_dataset
Dataset Caltech101
Number of samples: 6728
Number of categories: 102
Root of dataset: data/caltech-101
"""
# noqa: E501
METAINFO
=
{
'classes'
:
CALTECH101_CATEGORIES
}
def
__init__
(
self
,
data_root
:
str
,
split
:
str
=
'train'
,
**
kwargs
):
splits
=
[
'train'
,
'test'
]
assert
split
in
splits
,
\
f
"The split must be one of
{
splits
}
, but get '
{
split
}
'"
self
.
split
=
split
self
.
backend
=
get_file_backend
(
data_root
,
enable_singleton
=
True
)
if
split
==
'train'
:
ann_file
=
self
.
backend
.
join_path
(
'meta'
,
'train.txt'
)
else
:
ann_file
=
self
.
backend
.
join_path
(
'meta'
,
'test.txt'
)
data_prefix
=
'101_ObjectCategories'
test_mode
=
split
==
'test'
super
(
Caltech101
,
self
).
__init__
(
ann_file
=
ann_file
,
data_root
=
data_root
,
data_prefix
=
data_prefix
,
test_mode
=
test_mode
,
**
kwargs
)
def
load_data_list
(
self
):
"""Load images and ground truth labels."""
pairs
=
list_from_file
(
self
.
ann_file
)
data_list
=
[]
for
pair
in
pairs
:
path
,
gt_label
=
pair
.
split
()
img_path
=
self
.
backend
.
join_path
(
self
.
img_prefix
,
path
)
info
=
dict
(
img_path
=
img_path
,
gt_label
=
int
(
gt_label
))
data_list
.
append
(
info
)
return
data_list
def
extra_repr
(
self
)
->
List
[
str
]:
"""The extra repr information of the dataset."""
body
=
[
f
'Root of dataset:
\t
{
self
.
data_root
}
'
,
]
return
body
Prev
1
2
3
4
5
6
7
8
9
10
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment