Commit dff2c686 authored by renzhc's avatar renzhc
Browse files

first commit

parent 8f9dd0ed
Pipeline #1665 canceled with stages
_base_ = [
'../_base_/models/convnext/convnext-large.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=64)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=dict(max_norm=5.0),
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (64 GPUs) x (64 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
_base_ = [
'../_base_/models/convnext/convnext-large.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=64)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=None,
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (64 GPUs) x (64 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
_base_ = [
'../_base_/models/convnext/convnext-base.py',
'../_base_/datasets/imagenet21k_bs128.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# model setting
model = dict(head=dict(num_classes=21841))
# dataset setting
data_preprocessor = dict(num_classes=21841)
train_dataloader = dict(batch_size=64)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=dict(max_norm=5.0),
)
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
_base_ = [
'../_base_/models/convnext/convnext-small.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=128)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=dict(max_norm=5.0),
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
_base_ = [
'../_base_/models/convnext/convnext-small.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=128)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=None,
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
_base_ = [
'../_base_/models/convnext/convnext-tiny.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=128)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=dict(max_norm=5.0),
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
_base_ = [
'../_base_/models/convnext/convnext-tiny.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=128)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=None,
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
_base_ = [
'../_base_/models/convnext/convnext-xlarge.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=64)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=dict(max_norm=5.0),
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (64 GPUs) x (64 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
_base_ = [
'../_base_/models/convnext/convnext-xlarge.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=64)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=None,
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (64 GPUs) x (64 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
_base_ = [
'../_base_/models/convnext/convnext-base.py',
'../_base_/datasets/imagenet21k_bs128.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# model setting
model = dict(head=dict(num_classes=21841))
# dataset setting
data_preprocessor = dict(num_classes=21841)
train_dataloader = dict(batch_size=64)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=dict(max_norm=5.0),
)
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)
Collections:
- Name: ConvNeXt
Metadata:
Training Data: ImageNet-1k
Architecture:
- 1x1 Convolution
- LayerScale
Paper:
URL: https://arxiv.org/abs/2201.03545v1
Title: A ConvNet for the 2020s
README: configs/convnext/README.md
Code:
Version: v0.20.1
URL: https://github.com/open-mmlab/mmpretrain/blob/v0.20.1/mmcls/models/backbones/convnext.py
Models:
- Name: convnext-tiny_32xb128_in1k
Metadata:
FLOPs: 4457472768
Parameters: 28589128
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.14
Top 5 Accuracy: 96.06
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_32xb128_in1k_20221207-998cf3e9.pth
Config: configs/convnext/convnext-tiny_32xb128_in1k.py
- Name: convnext-tiny_32xb128-noema_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 4457472768
Parameters: 28589128
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 81.95
Top 5 Accuracy: 95.89
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_32xb128-noema_in1k_20221208-5d4509c7.pth
Config: configs/convnext/convnext-tiny_32xb128_in1k.py
- Name: convnext-tiny_in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 4457472768
Parameters: 28589128
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.90
Top 5 Accuracy: 96.62
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_in21k-pre_3rdparty_in1k_20221219-7501e534.pth
Config: configs/convnext/convnext-tiny_32xb128_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_1k_224.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-tiny_in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 13135236864
Parameters: 28589128
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 84.11
Top 5 Accuracy: 97.14
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_in21k-pre_3rdparty_in1k-384px_20221219-c1182362.pth
Config: configs/convnext/convnext-tiny_32xb128_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_1k_384.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-small_32xb128_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 8687008512
Parameters: 50223688
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.16
Top 5 Accuracy: 96.56
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_32xb128_in1k_20221207-4ab7052c.pth
Config: configs/convnext/convnext-small_32xb128_in1k.py
- Name: convnext-small_32xb128-noema_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 8687008512
Parameters: 50223688
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.21
Top 5 Accuracy: 96.48
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_32xb128-noema_in1k_20221208-4a618995.pth
Config: configs/convnext/convnext-small_32xb128_in1k.py
- Name: convnext-small_in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 8687008512
Parameters: 50223688
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 84.59
Top 5 Accuracy: 97.41
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_in21k-pre_3rdparty_in1k_20221219-aeca4c93.pth
Config: configs/convnext/convnext-small_32xb128_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_1k_224.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-small_in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 25580818176
Parameters: 50223688
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.75
Top 5 Accuracy: 97.88
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_in21k-pre_3rdparty_in1k-384px_20221219-96f0bb87.pth
Config: configs/convnext/convnext-small_32xb128_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_1k_384.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-base_32xb128_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 15359124480
Parameters: 88591464
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.66
Top 5 Accuracy: 96.74
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_32xb128_in1k_20221207-fbdb5eb9.pth
Config: configs/convnext/convnext-base_32xb128_in1k.py
- Name: convnext-base_32xb128-noema_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 15359124480
Parameters: 88591464
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.64
Top 5 Accuracy: 96.61
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_32xb128-noema_in1k_20221208-f8182678.pth
Config: configs/convnext/convnext-base_32xb128_in1k.py
- Name: convnext-base_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 15359124480
Parameters: 88591464
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.85
Top 5 Accuracy: 96.74
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128_in1k_20220124-d0915162.pth
Config: configs/convnext/convnext-base_32xb128_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-base_3rdparty-noema_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 15359124480
Parameters: 88591464
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.71
Top 5 Accuracy: 96.60
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128-noema_in1k_20220222-dba4f95f.pth
Config: configs/convnext/convnext-base_32xb128_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-base_3rdparty_in1k-384px
Metadata:
Training Data: ImageNet-1k
FLOPs: 45205885952
Parameters: 88591464
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.10
Top 5 Accuracy: 97.34
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_in1k-384px_20221219-c8f1dc2b.pth
Config: configs/convnext/convnext-base_32xb128_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_384.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-base_3rdparty_in21k
Metadata:
Training Data: ImageNet-21k
FLOPs: 15359124480
Parameters: 88591464
In Collection: ConvNeXt
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_in21k_20220124-13b83eec.pth
Config: configs/convnext/convnext-base_32xb128_in21k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-base_in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 15359124480
Parameters: 88591464
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.81
Top 5 Accuracy: 97.86
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_in21k-pre-3rdparty_32xb128_in1k_20220124-eb2d6ada.pth
Config: configs/convnext/convnext-base_32xb128_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_1k_224.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-base_in21k-pre-3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 45205885952
Parameters: 88591464
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 86.82
Top 5 Accuracy: 98.25
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_in21k-pre-3rdparty_in1k-384px_20221219-4570f792.pth
Config: configs/convnext/convnext-base_32xb128_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_1k_384.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-large_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 34368026112
Parameters: 197767336
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 84.30
Top 5 Accuracy: 96.89
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_64xb64_in1k_20220124-f8a0ded0.pth
Config: configs/convnext/convnext-large_64xb64_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-large_3rdparty_in1k-384px
Metadata:
Training Data: ImageNet-1k
FLOPs: 101103214080
Parameters: 197767336
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.50
Top 5 Accuracy: 97.59
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_in1k-384px_20221219-6dd29d10.pth
Config: configs/convnext/convnext-large_64xb64_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_384.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-large_3rdparty_in21k
Metadata:
Training Data: ImageNet-21k
FLOPs: 34368026112
Parameters: 197767336
In Collection: ConvNeXt
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_in21k_20220124-41b5a79f.pth
Config: configs/convnext/convnext-large_64xb64_in21k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-large_in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 34368026112
Parameters: 197767336
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 86.61
Top 5 Accuracy: 98.04
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_in21k-pre-3rdparty_64xb64_in1k_20220124-2412403d.pth
Config: configs/convnext/convnext-large_64xb64_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_1k_224.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-large_in21k-pre-3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 101103214080
Parameters: 197767336
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 87.46
Top 5 Accuracy: 98.37
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_in21k-pre-3rdparty_in1k-384px_20221219-6d38dd66.pth
Config: configs/convnext/convnext-large_64xb64_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_1k_384.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-xlarge_3rdparty_in21k
Metadata:
Training Data: ImageNet-21k
FLOPs: 60929820672
Parameters: 350196968
In Collection: ConvNeXt
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_3rdparty_in21k_20220124-f909bad7.pth
Config: configs/convnext/convnext-xlarge_64xb64_in21k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-xlarge_in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 60929820672
Parameters: 350196968
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 86.97
Top 5 Accuracy: 98.20
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k_20220124-76b6863d.pth
Config: configs/convnext/convnext-xlarge_64xb64_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_1k_224_ema.pth
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-xlarge_in21k-pre-3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 179196798976
Parameters: 350196968
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 87.76
Top 5 Accuracy: 98.55
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_in1k-384px_20221219-b161bc14.pth
Config: configs/convnext/convnext-xlarge_64xb64_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_1k_384_ema.pth
Code: https://github.com/facebookresearch/ConvNeXt
# ConvNeXt V2
> [Co-designing and Scaling ConvNets with Masked Autoencoders](http://arxiv.org/abs/2301.00808)
<!-- [ALGORITHM] -->
## Abstract
Driven by improved architectures and better representation learning frameworks, the field of visual recognition has enjoyed rapid modernization and performance boost in the early 2020s. For example, modern ConvNets, represented by ConvNeXt, have demonstrated strong performance in various scenarios. While these models were originally designed for supervised learning with ImageNet labels, they can also potentially benefit from self-supervised learning techniques such as masked autoencoders (MAE). However, we found that simply combining these two approaches leads to subpar performance. In this paper, we propose a fully convolutional masked autoencoder framework and a new Global Response Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. This co-design of self-supervised learning techniques and architectural improvement results in a new model family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation. We also provide pre-trained ConvNeXt V2 models of various sizes, ranging from an efficient 3.7M-parameter Atto model with 76.7% top-1 accuracy on ImageNet, to a 650M Huge model that achieves a state-of-the-art 88.9% accuracy using only public training data.
<div align=center>
<img src="https://user-images.githubusercontent.com/26739999/210496285-f235083f-218f-4153-8e21-c8a64481a2f5.png" width="50%"/>
</div>
## How to use it?
<!-- [TABS-BEGIN] -->
**Predict image**
```python
from mmpretrain import inference_model
predict = inference_model('convnext-v2-atto_fcmae-pre_3rdparty_in1k', 'demo/bird.JPEG')
print(predict['pred_class'])
print(predict['pred_score'])
```
**Use the model**
```python
import torch
from mmpretrain import get_model
model = get_model('convnext-v2-atto_3rdparty-fcmae_in1k', pretrained=True)
inputs = torch.rand(1, 3, 224, 224)
out = model(inputs)
print(type(out))
# To extract features.
feats = model.extract_feat(inputs)
print(type(feats))
```
**Test Command**
Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
Test:
```shell
python tools/test.py configs/convnext_v2/convnext-v2-atto_32xb32_in1k.py https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-atto_fcmae-pre_3rdparty_in1k_20230104-23765f83.pth
```
<!-- [TABS-END] -->
## Models and results
### Pretrained models
| Model | Params (M) | Flops (G) | Config | Download |
| :---------------------------------------- | :--------: | :-------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------: |
| `convnext-v2-atto_3rdparty-fcmae_in1k`\* | 3.71 | 0.55 | [config](convnext-v2-atto_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-atto_3rdparty-fcmae_in1k_20230104-07514db4.pth) |
| `convnext-v2-femto_3rdparty-fcmae_in1k`\* | 5.23 | 0.78 | [config](convnext-v2-femto_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-femto_3rdparty-fcmae_in1k_20230104-adbe2082.pth) |
| `convnext-v2-pico_3rdparty-fcmae_in1k`\* | 9.07 | 1.37 | [config](convnext-v2-pico_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-pico_3rdparty-fcmae_in1k_20230104-147b1b59.pth) |
| `convnext-v2-nano_3rdparty-fcmae_in1k`\* | 15.62 | 2.45 | [config](convnext-v2-nano_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_3rdparty-fcmae_in1k_20230104-3dd1f29e.pth) |
| `convnext-v2-tiny_3rdparty-fcmae_in1k`\* | 28.64 | 4.47 | [config](convnext-v2-tiny_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_3rdparty-fcmae_in1k_20230104-80513adc.pth) |
| `convnext-v2-base_3rdparty-fcmae_in1k`\* | 88.72 | 15.38 | [config](convnext-v2-base_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_3rdparty-fcmae_in1k_20230104-8a798eaf.pth) |
| `convnext-v2-large_3rdparty-fcmae_in1k`\* | 197.96 | 34.40 | [config](convnext-v2-large_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_3rdparty-fcmae_in1k_20230104-bf38df92.pth) |
| `convnext-v2-huge_3rdparty-fcmae_in1k`\* | 660.29 | 115.00 | [config](convnext-v2-huge_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_3rdparty-fcmae_in1k_20230104-fe43ae6c.pth) |
*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt-V2). The config files of these models are only for inference. We haven't reproduce the training results.*
### Image Classification on ImageNet-1k
| Model | Pretrain | Params (M) | Flops (G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :---------------------------------------------- | :----------------: | :--------: | :-------: | :-------: | :-------: | :----------------------------------------------: | :------------------------------------------------: |
| `convnext-v2-atto_fcmae-pre_3rdparty_in1k`\* | FCMAE | 3.71 | 0.55 | 76.64 | 93.04 | [config](convnext-v2-atto_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-atto_fcmae-pre_3rdparty_in1k_20230104-23765f83.pth) |
| `convnext-v2-femto_fcmae-pre_3rdparty_in1k`\* | FCMAE | 5.23 | 0.78 | 78.48 | 93.98 | [config](convnext-v2-femto_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-femto_fcmae-pre_3rdparty_in1k_20230104-92a75d75.pth) |
| `convnext-v2-pico_fcmae-pre_3rdparty_in1k`\* | FCMAE | 9.07 | 1.37 | 80.31 | 95.08 | [config](convnext-v2-pico_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-pico_fcmae-pre_3rdparty_in1k_20230104-d20263ca.pth) |
| `convnext-v2-nano_fcmae-pre_3rdparty_in1k`\* | FCMAE | 15.62 | 2.45 | 81.86 | 95.75 | [config](convnext-v2-nano_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-pre_3rdparty_in1k_20230104-fe1aaaf2.pth) |
| `convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k`\* | FCMAE ImageNet-21k | 15.62 | 2.45 | 82.04 | 96.16 | [config](convnext-v2-nano_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k_20230104-91fa8ae2.pth) |
| `convnext-v2-tiny_fcmae-pre_3rdparty_in1k`\* | FCMAE | 28.64 | 4.47 | 82.94 | 96.29 | [config](convnext-v2-tiny_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-pre_3rdparty_in1k_20230104-471a86de.pth) |
| `convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k`\* | FCMAE ImageNet-21k | 28.64 | 4.47 | 83.89 | 96.96 | [config](convnext-v2-tiny_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k_20230104-8cc8b8f2.pth) |
| `convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k-384px`\* | FCMAE ImageNet-21k | 15.62 | 7.21 | 83.36 | 96.75 | [config](convnext-v2-nano_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-f951ae87.pth) |
| `convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k-384px`\* | FCMAE ImageNet-21k | 28.64 | 13.14 | 85.09 | 97.63 | [config](convnext-v2-tiny_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-d8579f84.pth) |
| `convnext-v2-base_fcmae-pre_3rdparty_in1k`\* | FCMAE | 88.72 | 15.38 | 84.87 | 97.08 | [config](convnext-v2-base_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-pre_3rdparty_in1k_20230104-00a70fa4.pth) |
| `convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k`\* | FCMAE ImageNet-21k | 88.72 | 15.38 | 86.74 | 98.02 | [config](convnext-v2-base_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k_20230104-c48d16a5.pth) |
| `convnext-v2-large_fcmae-pre_3rdparty_in1k`\* | FCMAE | 197.96 | 34.40 | 85.76 | 97.59 | [config](convnext-v2-large_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-pre_3rdparty_in1k_20230104-ef393013.pth) |
| `convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k`\* | FCMAE ImageNet-21k | 197.96 | 34.40 | 87.26 | 98.24 | [config](convnext-v2-large_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k_20230104-d9c4dc0c.pth) |
| `convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k-384px`\* | FCMAE ImageNet-21k | 88.72 | 45.21 | 87.63 | 98.42 | [config](convnext-v2-base_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-379425cc.pth) |
| `convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k-384px`\* | FCMAE ImageNet-21k | 197.96 | 101.10 | 88.18 | 98.52 | [config](convnext-v2-large_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-9139a1f3.pth) |
| `convnext-v2-huge_fcmae-pre_3rdparty_in1k`\* | FCMAE | 660.29 | 115.00 | 86.25 | 97.75 | [config](convnext-v2-huge_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-pre_3rdparty_in1k_20230104-f795e5b8.pth) |
| `convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-384px`\* | FCMAE ImageNet-21k | 660.29 | 337.96 | 88.68 | 98.73 | [config](convnext-v2-huge_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-02a4eb35.pth) |
| `convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-512px`\* | FCMAE ImageNet-21k | 660.29 | 600.81 | 88.86 | 98.74 | [config](convnext-v2-huge_32xb32_in1k-512px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-512px_20230104-ce32e63c.pth) |
*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt-V2). The config files of these models are only for inference. We haven't reproduce the training results.*
## Citation
```bibtex
@article{Woo2023ConvNeXtV2,
title={ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders},
author={Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon and Saining Xie},
year={2023},
journal={arXiv preprint arXiv:2301.00808},
}
```
_base_ = [
'../_base_/models/convnext_v2/atto.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/base.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/base.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/femto.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/huge.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/huge.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=512,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=512, backend='pillow', interpolation='bicubic'),
dict(type='PackInputs'),
]
train_dataloader = dict(batch_size=32, dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/huge.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
_base_ = [
'../_base_/models/convnext_v2/large.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment