Commit dff2c686 authored by renzhc's avatar renzhc
Browse files

first commit

parent 8f9dd0ed
Pipeline #1665 canceled with stages
_base_ = [
'../_base_/models/convnext_v2/large.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/nano.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/nano.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/pico.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/tiny.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=3.2e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=40,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=40)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/tiny.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=3.2e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=40,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=40)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
Collections:
- Name: ConvNeXt V2
Metadata:
Architecture:
- Global Response Normalization
Paper:
Title: Co-designing and Scaling ConvNets with Masked Autoencoders
URL: http://arxiv.org/abs/2301.00808
README: configs/convnext_v2/README.md
Models:
- Name: convnext-v2-atto_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 551718080
Parameters: 3708400
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-atto_3rdparty-fcmae_in1k_20230104-07514db4.pth
Config: configs/convnext_v2/convnext-v2-atto_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_atto_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-atto_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 551718080
Parameters: 3708400
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 76.64
Top 5 Accuracy: 93.04
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-atto_fcmae-pre_3rdparty_in1k_20230104-23765f83.pth
Config: configs/convnext_v2/convnext-v2-atto_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_atto_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-femto_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 784892544
Parameters: 5233240
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-femto_3rdparty-fcmae_in1k_20230104-adbe2082.pth
Config: configs/convnext_v2/convnext-v2-femto_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_femto_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-femto_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 784892544
Parameters: 5233240
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 78.48
Top 5 Accuracy: 93.98
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-femto_fcmae-pre_3rdparty_in1k_20230104-92a75d75.pth
Config: configs/convnext_v2/convnext-v2-femto_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_femto_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-pico_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 1374072320
Parameters: 9066280
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-pico_3rdparty-fcmae_in1k_20230104-147b1b59.pth
Config: configs/convnext_v2/convnext-v2-pico_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_pico_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-pico_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 1374072320
Parameters: 9066280
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 80.31
Top 5 Accuracy: 95.08
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-pico_fcmae-pre_3rdparty_in1k_20230104-d20263ca.pth
Config: configs/convnext_v2/convnext-v2-pico_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_pico_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-nano_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 2454926720
Parameters: 15623800
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_3rdparty-fcmae_in1k_20230104-3dd1f29e.pth
Config: configs/convnext_v2/convnext-v2-nano_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_nano_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-nano_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 2454926720
Parameters: 15623800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 81.86
Top 5 Accuracy: 95.75
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-pre_3rdparty_in1k_20230104-fe1aaaf2.pth
Config: configs/convnext_v2/convnext-v2-nano_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_nano_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 2454926720
Parameters: 15623800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.04
Top 5 Accuracy: 96.16
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k_20230104-91fa8ae2.pth
Config: configs/convnext_v2/convnext-v2-nano_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_nano_22k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-tiny_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 4469631744
Parameters: 28635496
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_3rdparty-fcmae_in1k_20230104-80513adc.pth
Config: configs/convnext_v2/convnext-v2-tiny_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_tiny_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-tiny_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 4469631744
Parameters: 28635496
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.94
Top 5 Accuracy: 96.29
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-pre_3rdparty_in1k_20230104-471a86de.pth
Config: configs/convnext_v2/convnext-v2-tiny_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_tiny_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 4469631744
Parameters: 28635496
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.89
Top 5 Accuracy: 96.96
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k_20230104-8cc8b8f2.pth
Config: configs/convnext_v2/convnext-v2-tiny_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_tiny_22k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 7214472320
Parameters: 15623800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.36
Top 5 Accuracy: 96.75
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-f951ae87.pth
Config: configs/convnext_v2/convnext-v2-nano_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_nano_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 13135236864
Parameters: 28635496
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.09
Top 5 Accuracy: 97.63
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-d8579f84.pth
Config: configs/convnext_v2/convnext-v2-tiny_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_tiny_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-base_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 15382561792
Parameters: 88717800
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_3rdparty-fcmae_in1k_20230104-8a798eaf.pth
Config: configs/convnext_v2/convnext-v2-base_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_base_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-base_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 15382561792
Parameters: 88717800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 84.87
Top 5 Accuracy: 97.08
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-pre_3rdparty_in1k_20230104-00a70fa4.pth
Config: configs/convnext_v2/convnext-v2-base_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_base_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 15382561792
Parameters: 88717800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 86.74
Top 5 Accuracy: 98.02
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k_20230104-c48d16a5.pth
Config: configs/convnext_v2/convnext-v2-base_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_base_22k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-large_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 34403182080
Parameters: 197956840
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_3rdparty-fcmae_in1k_20230104-bf38df92.pth
Config: configs/convnext_v2/convnext-v2-large_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_large_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-large_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 34403182080
Parameters: 197956840
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.76
Top 5 Accuracy: 97.59
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-pre_3rdparty_in1k_20230104-ef393013.pth
Config: configs/convnext_v2/convnext-v2-large_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_large_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 34403182080
Parameters: 197956840
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 87.26
Top 5 Accuracy: 98.24
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k_20230104-d9c4dc0c.pth
Config: configs/convnext_v2/convnext-v2-large_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_large_22k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 45205885952
Parameters: 88717800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 87.63
Top 5 Accuracy: 98.42
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-379425cc.pth
Config: configs/convnext_v2/convnext-v2-base_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_base_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 101103214080
Parameters: 197956840
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 88.18
Top 5 Accuracy: 98.52
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-9139a1f3.pth
Config: configs/convnext_v2/convnext-v2-large_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_large_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-huge_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 114998639360
Parameters: 660289640
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_3rdparty-fcmae_in1k_20230104-fe43ae6c.pth
Config: configs/convnext_v2/convnext-v2-huge_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_huge_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-huge_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 114998639360
Parameters: 660289640
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 86.25
Top 5 Accuracy: 97.75
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-pre_3rdparty_in1k_20230104-f795e5b8.pth
Config: configs/convnext_v2/convnext-v2-huge_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_huge_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 337955157760
Parameters: 660289640
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 88.68
Top 5 Accuracy: 98.73
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-02a4eb35.pth
Config: configs/convnext_v2/convnext-v2-huge_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_huge_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-512px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 600809158400
Parameters: 660289640
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 88.86
Top 5 Accuracy: 98.74
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-512px_20230104-ce32e63c.pth
Config: configs/convnext_v2/convnext-v2-huge_32xb32_in1k-512px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_huge_22k_512_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
# CSPNet
> [CSPNet: A New Backbone that can Enhance Learning Capability of CNN](https://arxiv.org/abs/1911.11929)
<!-- [ALGORITHM] -->
## Abstract
Neural networks have enabled state-of-the-art approaches to achieve incredible results on computer vision tasks such as object detection. However, such success greatly relies on costly computation resources, which hinders people with cheap devices from appreciating the advanced technology. In this paper, we propose Cross Stage Partial Network (CSPNet) to mitigate the problem that previous works require heavy inference computations from the network architecture perspective. We attribute the problem to the duplicate gradient information within network optimization. The proposed networks respect the variability of the gradients by integrating feature maps from the beginning and the end of a network stage, which, in our experiments, reduces computations by 20% with equivalent or even superior accuracy on the ImageNet dataset, and significantly outperforms state-of-the-art approaches in terms of AP50 on the MS COCO object detection dataset. The CSPNet is easy to implement and general enough to cope with architectures based on ResNet, ResNeXt, and DenseNet. Source code is at this https URL.
<div align=center>
<img src="https://user-images.githubusercontent.com/18586273/159420842-6147c687-a488-460c-8bb2-4ea5276c26c7.png" width="60%"/>
</div>
## How to use it?
<!-- [TABS-BEGIN] -->
**Predict image**
```python
from mmpretrain import inference_model
predict = inference_model('cspdarknet50_3rdparty_8xb32_in1k', 'demo/bird.JPEG')
print(predict['pred_class'])
print(predict['pred_score'])
```
**Use the model**
```python
import torch
from mmpretrain import get_model
model = get_model('cspdarknet50_3rdparty_8xb32_in1k', pretrained=True)
inputs = torch.rand(1, 3, 224, 224)
out = model(inputs)
print(type(out))
# To extract features.
feats = model.extract_feat(inputs)
print(type(feats))
```
**Test Command**
Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
Test:
```shell
python tools/test.py configs/cspnet/cspdarknet50_8xb32_in1k.py https://download.openmmlab.com/mmclassification/v0/cspnet/cspdarknet50_3rdparty_8xb32_in1k_20220329-bd275287.pth
```
<!-- [TABS-END] -->
## Models and results
### Image Classification on ImageNet-1k
| Model | Pretrain | Params (M) | Flops (G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :----------------------------------- | :----------: | :--------: | :-------: | :-------: | :-------: | :----------------------------------: | :-----------------------------------------------------------------------------: |
| `cspdarknet50_3rdparty_8xb32_in1k`\* | From scratch | 27.64 | 5.04 | 80.05 | 95.07 | [config](cspdarknet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspdarknet50_3rdparty_8xb32_in1k_20220329-bd275287.pth) |
| `cspresnet50_3rdparty_8xb32_in1k`\* | From scratch | 21.62 | 3.48 | 79.55 | 94.68 | [config](cspresnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth) |
| `cspresnext50_3rdparty_8xb32_in1k`\* | From scratch | 20.57 | 3.11 | 79.96 | 94.96 | [config](cspresnext50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnext50_3rdparty_8xb32_in1k_20220329-2cc84d21.pth) |
*Models with * are converted from the [official repo](https://github.com/rwightman/pytorch-image-models). The config files of these models are only for inference. We haven't reproduce the training results.*
## Citation
```bibtex
@inproceedings{wang2020cspnet,
title={CSPNet: A new backbone that can enhance learning capability of CNN},
author={Wang, Chien-Yao and Liao, Hong-Yuan Mark and Wu, Yueh-Hua and Chen, Ping-Yang and Hsieh, Jun-Wei and Yeh, I-Hau},
booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition workshops},
pages={390--391},
year={2020}
}
```
_base_ = [
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='CSPDarkNet', depth=53),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
))
# dataset settings
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=224,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=288,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=256),
dict(type='PackInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
_base_ = [
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='CSPResNet', depth=50),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
))
# dataset settings
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=224,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=288,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=256),
dict(type='PackInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
_base_ = [
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='CSPResNeXt', depth=50),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=2048,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
))
# dataset settings
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=224,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=256,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
Collections:
- Name: CSPNet
Metadata:
Training Data: ImageNet-1k
Architecture:
- Cross Stage Partia Stage
Paper:
URL: https://arxiv.org/abs/1911.11929
Title: 'CSPNet: A New Backbone that can Enhance Learning Capability of CNN'
README: configs/cspnet/README.md
Code:
Version: v0.22.0
URL: https://github.com/open-mmlab/mmpretrain/blob/v0.22.0/mmcls/models/backbones/cspnet.py
Models:
- Name: cspdarknet50_3rdparty_8xb32_in1k
Metadata:
FLOPs: 5040000000
Parameters: 27640000
In Collection: CSPNet
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 80.05
Top 5 Accuracy: 95.07
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/cspnet/cspdarknet50_3rdparty_8xb32_in1k_20220329-bd275287.pth
Config: configs/cspnet/cspdarknet50_8xb32_in1k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspdarknet53_ra_256-d05c7c21.pth
Code: https://github.com/rwightman/pytorch-image-models
- Name: cspresnet50_3rdparty_8xb32_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 3480000000
Parameters: 21620000
In Collection: CSPNet
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 79.55
Top 5 Accuracy: 94.68
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth
Config: configs/cspnet/cspresnet50_8xb32_in1k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspresnet50_ra-d3e8d487.pth
Code: https://github.com/rwightman/pytorch-image-models
- Name: cspresnext50_3rdparty_8xb32_in1k
Metadata:
FLOPs: 3110000000
Parameters: 20570000
In Collection: CSPNet
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 79.96
Top 5 Accuracy: 94.96
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnext50_3rdparty_8xb32_in1k_20220329-2cc84d21.pth
Config: configs/cspnet/cspresnext50_8xb32_in1k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspresnext50_ra_224-648b4713.pth
Code: https://github.com/rwightman/pytorch-image-models
# CSRA
> [Residual Attention: A Simple but Effective Method for Multi-Label Recognition](https://arxiv.org/abs/2108.02456)
<!-- [ALGORITHM] -->
## Abstract
Multi-label image recognition is a challenging computer vision task of practical use. Progresses in this area, however, are often characterized by complicated methods, heavy computations, and lack of intuitive explanations. To effectively capture different spatial regions occupied by objects from different categories, we propose an embarrassingly simple module, named class-specific residual attention (CSRA). CSRA generates class-specific features for every category by proposing a simple spatial attention score, and then combines it with the class-agnostic average pooling feature. CSRA achieves state-of-the-art results on multilabel recognition, and at the same time is much simpler than them. Furthermore, with only 4 lines of code, CSRA also leads to consistent improvement across many diverse pretrained models and datasets without any extra training. CSRA is both easy to implement and light in computations, which also enjoys intuitive explanations and visualizations.
<div align=center>
<img src="https://user-images.githubusercontent.com/84259897/176982245-3ffcff56-a4ea-4474-9967-bc2b612bbaa3.png" width="80%"/>
</div>
## How to use it?
<!-- [TABS-BEGIN] -->
**Use the model**
```python
import torch
from mmpretrain import get_model
model = get_model('resnet101-csra_1xb16_voc07-448px', pretrained=True)
inputs = torch.rand(1, 3, 224, 224)
out = model(inputs)
print(type(out))
# To extract features.
feats = model.extract_feat(inputs)
print(type(feats))
```
**Train/Test Command**
Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
Train:
```shell
python tools/train.py configs/csra/resnet101-csra_1xb16_voc07-448px.py
```
Test:
```shell
python tools/test.py configs/csra/resnet101-csra_1xb16_voc07-448px.py https://download.openmmlab.com/mmclassification/v0/csra/resnet101-csra_1xb16_voc07-448px_20220722-29efb40a.pth
```
<!-- [TABS-END] -->
## Models and results
### Multi-Label Classification on PASCAL VOC 2007
| Model | Pretrain | Params (M) | Flops (G) | CF1 | OF1 | mAP | Config | Download |
| :--------------------------------- | :----------: | :--------: | :-------: | :---: | :---: | :---: | :-------------------------------------------: | :-------------------------------------------------------------------------: |
| `resnet101-csra_1xb16_voc07-448px` | From scratch | 23.55 | 4.12 | 89.16 | 90.80 | 94.98 | [config](resnet101-csra_1xb16_voc07-448px.py) | [model](https://download.openmmlab.com/mmclassification/v0/csra/resnet101-csra_1xb16_voc07-448px_20220722-29efb40a.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/csra/resnet101-csra_1xb16_voc07-448px_20220722-29efb40a.json) |
## Citation
```bibtex
@misc{https://doi.org/10.48550/arxiv.2108.02456,
doi = {10.48550/ARXIV.2108.02456},
url = {https://arxiv.org/abs/2108.02456},
author = {Zhu, Ke and Wu, Jianxin},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Residual Attention: A Simple but Effective Method for Multi-Label Recognition},
publisher = {arXiv},
year = {2021},
copyright = {arXiv.org perpetual, non-exclusive license}
}
```
Collections:
- Name: CSRA
Metadata:
Training Data: PASCAL VOC 2007
Architecture:
- Class-specific Residual Attention
Paper:
URL: https://arxiv.org/abs/2108.02456
Title: 'Residual Attention: A Simple but Effective Method for Multi-Label Recognition'
README: configs/csra/README.md
Code:
Version: v0.24.0
URL: https://github.com/open-mmlab/mmpretrain/blob/v0.24.0/mmcls/models/heads/multi_label_csra_head.py
Models:
- Name: resnet101-csra_1xb16_voc07-448px
Metadata:
FLOPs: 4120000000
Parameters: 23550000
In Collection: CSRA
Results:
- Dataset: PASCAL VOC 2007
Metrics:
mAP: 94.98
OF1: 90.80
CF1: 89.16
Task: Multi-Label Classification
Weights: https://download.openmmlab.com/mmclassification/v0/csra/resnet101-csra_1xb16_voc07-448px_20220722-29efb40a.pth
Config: configs/csra/resnet101-csra_1xb16_voc07-448px.py
_base_ = ['../_base_/datasets/voc_bs16.py', '../_base_/default_runtime.py']
# Pre-trained Checkpoint Path
checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.pth' # noqa
# If you want to use the pre-trained weight of ResNet101-CutMix from the
# originary repo(https://github.com/Kevinz-code/CSRA). Script of
# 'tools/model_converters/torchvision_to_mmpretrain.py' can help you convert
# weight into mmpretrain format. The mAP result would hit 95.5 by using the
# weight. checkpoint = 'PATH/TO/PRE-TRAINED_WEIGHT'
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(3, ),
style='pytorch',
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint, prefix='backbone')),
neck=None,
head=dict(
type='CSRAClsHead',
num_classes=20,
in_channels=2048,
num_heads=1,
lam=0.1,
loss=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)))
# dataset setting
data_preprocessor = dict(
# RGB format normalization parameters
mean=[0, 0, 0],
std=[255, 255, 255])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', scale=448, crop_ratio_range=(0.7, 1.0)),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=448),
dict(
type='PackInputs',
# `gt_label_difficult` is needed for VOC evaluation
meta_keys=('sample_idx', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction',
'gt_label_difficult')),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
# optimizer
# the lr of classifier.head is 10 * base_lr, which help convergence.
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.0002, momentum=0.9, weight_decay=0.0001),
paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10)}))
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-7,
by_epoch=True,
begin=0,
end=1,
convert_to_iter_based=True),
dict(type='StepLR', by_epoch=True, step_size=6, gamma=0.1)
]
train_cfg = dict(by_epoch=True, max_epochs=20, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# DaViT
> [DaViT: Dual Attention Vision Transformers](https://arxiv.org/abs/2204.03645v1)
<!-- [ALGORITHM] -->
## Abstract
In this work, we introduce Dual Attention Vision Transformers (DaViT), a simple yet effective vision transformer architecture that is able to capture global context while maintaining computational efficiency. We propose approaching the problem from an orthogonal angle: exploiting self-attention mechanisms with both "spatial tokens" and "channel tokens". With spatial tokens, the spatial dimension defines the token scope, and the channel dimension defines the token feature dimension. With channel tokens, we have the inverse: the channel dimension defines the token scope, and the spatial dimension defines the token feature dimension. We further group tokens along the sequence direction for both spatial and channel tokens to maintain the linear complexity of the entire model. We show that these two self-attentions complement each other: (i) since each channel token contains an abstract representation of the entire image, the channel attention naturally captures global interactions and representations by taking all spatial positions into account when computing attention scores between channels; (ii) the spatial attention refines the local representations by performing fine-grained interactions across spatial locations, which in turn helps the global information modeling in channel attention. Extensive experiments show our DaViT achieves state-of-the-art performance on four different tasks with efficient computations. Without extra data, DaViT-Tiny, DaViT-Small, and DaViT-Base achieve 82.8%, 84.2%, and 84.6% top-1 accuracy on ImageNet-1K with 28.3M, 49.7M, and 87.9M parameters, respectively. When we further scale up DaViT with 1.5B weakly supervised image and text pairs, DaViT-Gaint reaches 90.4% top-1 accuracy on ImageNet-1K.
<div align=center>
<img src="https://user-images.githubusercontent.com/24734142/196125065-e232409b-f710-4729-b657-4e5f9158f2d1.png" width="90%"/>
</div>
## How to use it?
<!-- [TABS-BEGIN] -->
**Predict image**
```python
from mmpretrain import inference_model
predict = inference_model('davit-tiny_3rdparty_in1k', 'demo/bird.JPEG')
print(predict['pred_class'])
print(predict['pred_score'])
```
**Use the model**
```python
import torch
from mmpretrain import get_model
model = get_model('davit-tiny_3rdparty_in1k', pretrained=True)
inputs = torch.rand(1, 3, 224, 224)
out = model(inputs)
print(type(out))
# To extract features.
feats = model.extract_feat(inputs)
print(type(feats))
```
**Test Command**
Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
Test:
```shell
python tools/test.py configs/davit/davit-tiny_4xb256_in1k.py https://download.openmmlab.com/mmclassification/v0/davit/davit-tiny_3rdparty_in1k_20221116-700fdf7d.pth
```
<!-- [TABS-END] -->
## Models and results
### Image Classification on ImageNet-1k
| Model | Pretrain | Params (M) | Flops (G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :---------------------------- | :----------: | :--------: | :-------: | :-------: | :-------: | :----------------------------------: | :------------------------------------------------------------------------------------: |
| `davit-tiny_3rdparty_in1k`\* | From scratch | 28.36 | 4.54 | 82.24 | 96.13 | [config](davit-tiny_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/davit/davit-tiny_3rdparty_in1k_20221116-700fdf7d.pth) |
| `davit-small_3rdparty_in1k`\* | From scratch | 49.75 | 8.80 | 83.61 | 96.75 | [config](davit-small_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/davit/davit-small_3rdparty_in1k_20221116-51a849a6.pth) |
| `davit-base_3rdparty_in1k`\* | From scratch | 87.95 | 15.51 | 84.09 | 96.82 | [config](davit-base_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/davit/davit-base_3rdparty_in1k_20221116-19e0d956.pth) |
*Models with * are converted from the [official repo](https://github.com/dingmyu/davit/blob/main/mmdet/mmdet/models/backbones/davit.py#L355). The config files of these models are only for inference. We haven't reproduce the training results.*
## Citation
```bibtex
@inproceedings{ding2022davit,
title={DaViT: Dual Attention Vision Transformer},
author={Ding, Mingyu and Xiao, Bin and Codella, Noel and Luo, Ping and Wang, Jingdong and Yuan, Lu},
booktitle={ECCV},
year={2022},
}
```
_base_ = [
'../_base_/models/davit/davit-base.py',
'../_base_/datasets/imagenet_bs256_davit_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py'
]
# data settings
train_dataloader = dict(batch_size=256)
_base_ = [
'../_base_/models/davit/davit-small.py',
'../_base_/datasets/imagenet_bs256_davit_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py'
]
# data settings
train_dataloader = dict(batch_size=256)
_base_ = [
'../_base_/models/davit/davit-tiny.py',
'../_base_/datasets/imagenet_bs256_davit_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py'
]
# data settings
train_dataloader = dict(batch_size=256)
Collections:
- Name: DaViT
Metadata:
Architecture:
- GELU
- Layer Normalization
- Multi-Head Attention
- Scaled Dot-Product Attention
Paper:
URL: https://arxiv.org/abs/2204.03645v1
Title: 'DaViT: Dual Attention Vision Transformers'
README: configs/davit/README.md
Code:
URL: https://github.com/open-mmlab/mmpretrain/blob/v1.0.0rc3/mmcls/models/backbones/davit.py
Version: v1.0.0rc3
Models:
- Name: davit-tiny_3rdparty_in1k
In Collection: DaViT
Metadata:
FLOPs: 4539698688
Parameters: 28360168
Training Data:
- ImageNet-1k
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 82.24
Top 5 Accuracy: 96.13
Weights: https://download.openmmlab.com/mmclassification/v0/davit/davit-tiny_3rdparty_in1k_20221116-700fdf7d.pth
Converted From:
Weights: https://drive.google.com/file/d/1RSpi3lxKaloOL5-or20HuG975tbPwxRZ/view?usp=sharing
Code: https://github.com/dingmyu/davit/blob/main/mmdet/mmdet/models/backbones/davit.py#L355
Config: configs/davit/davit-tiny_4xb256_in1k.py
- Name: davit-small_3rdparty_in1k
In Collection: DaViT
Metadata:
FLOPs: 8799942144
Parameters: 49745896
Training Data:
- ImageNet-1k
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 83.61
Top 5 Accuracy: 96.75
Weights: https://download.openmmlab.com/mmclassification/v0/davit/davit-small_3rdparty_in1k_20221116-51a849a6.pth
Converted From:
Weights: https://drive.google.com/file/d/1q976ruj45mt0RhO9oxhOo6EP_cmj4ahQ/view?usp=sharing
Code: https://github.com/dingmyu/davit/blob/main/mmdet/mmdet/models/backbones/davit.py#L355
Config: configs/davit/davit-small_4xb256_in1k.py
- Name: davit-base_3rdparty_in1k
In Collection: DaViT
Metadata:
FLOPs: 15509702656
Parameters: 87954408
Training Data:
- ImageNet-1k
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 84.09
Top 5 Accuracy: 96.82
Weights: https://download.openmmlab.com/mmclassification/v0/davit/davit-base_3rdparty_in1k_20221116-19e0d956.pth
Converted From:
Weights: https://drive.google.com/file/d/1u9sDBEueB-YFuLigvcwf4b2YyA4MIVsZ/view?usp=sharing
Code: https://github.com/dingmyu/davit/blob/main/mmdet/mmdet/models/backbones/davit.py#L355
Config: configs/davit/davit-base_4xb256_in1k.py
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment