first commit

dff2c686 · renzhc · 8f9dd0ed · dff2c686 · dff2c686 · dff2c686
Commit dff2c686 authored Sep 03, 2024 by renzhc
20 changed files
--- a/configs/mixmim/benchmarks/mixmim-base_8xb128-coslr-100e_in1k.py
+++ b/configs/mixmim/benchmarks/mixmim-base_8xb128-coslr-100e_in1k.py
+_base_ = [
+    '../../_base_/models/mixmim/mixmim_base.py',
+    '../../_base_/datasets/imagenet_bs64_swin_224.py',
+    '../../_base_/default_runtime.py'
+]
+# dataset settings
+dataset_type = 'ImageNet'
+data_root = 'data/imagenet/'
+data_preprocessor = dict(
+    mean=[123.675, 116.28, 103.53],
+    std=[58.395, 57.12, 57.375],
+    to_rgb=True,
+)
+bgr_mean = data_preprocessor['mean'][::-1]
+bgr_std = data_preprocessor['std'][::-1]
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomResizedCrop',
+        scale=224,
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
+    dict(
+        type='RandAugment',
+        policies='timm_increasing',
+        num_policies=2,
+        total_level=10,
+        magnitude_level=9,
+        magnitude_std=0.5,
+        hparams=dict(
+            pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')),
+    dict(
+        type='RandomErasing',
+        erase_prob=0.25,
+        mode='rand',
+        min_area_ratio=0.02,
+        max_area_ratio=1 / 3,
+        fill_color=bgr_mean,
+        fill_std=bgr_std),
+    dict(type='PackInputs'),
+]
+train_dataloader = dict(
+    batch_size=128,
+    num_workers=16,
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='meta/train.txt',
+        data_prefix='train',
+        pipeline=train_pipeline),
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    persistent_workers=True,
+)
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeEdge',
+        scale=256,
+        edge='short',
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='CenterCrop', crop_size=224),
+    dict(type='PackInputs'),
+]
+val_dataloader = dict(
+    batch_size=64,
+    num_workers=8,
+    pin_memory=True,
+    collate_fn=dict(type='default_collate'),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='meta/val.txt',
+        data_prefix='val',
+        pipeline=test_pipeline),
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    persistent_workers=True,
+)
+test_dataloader = val_dataloader
+model = dict(
+    backbone=dict(
+        init_cfg=dict(type='Pretrained', checkpoint='', prefix='backbone.')))
+# optimizer
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='AdamW',
+        lr=5e-4 * (8 * 128 / 256),
+        betas=(0.9, 0.999),
+        weight_decay=0.05),
+    constructor='LearningRateDecayOptimWrapperConstructor',
+    paramwise_cfg=dict(
+        layer_decay_rate=0.7,
+        custom_keys={
+            '.ln': dict(decay_mult=0.0),  # do not decay on ln and bias
+            '.bias': dict(decay_mult=0.0)
+        }))
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1e-6,
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=95,
+        eta_min=1e-6,
+        by_epoch=True,
+        begin=5,
+        end=100,
+        convert_to_iter_based=True)
+]
+train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=10)
+val_cfg = dict()
+test_cfg = dict()
+default_hooks = dict(
+    # save checkpoint per epoch.
+    checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=1))
--- a/configs/mixmim/benchmarks/mixmim-base_8xb64_in1k.py
+++ b/configs/mixmim/benchmarks/mixmim-base_8xb64_in1k.py
+_base_ = [
+    '../../_base_/models/mixmim/mixmim_base.py',
+    '../../_base_/datasets/imagenet_bs64_swin_224.py',
+    '../../_base_/schedules/imagenet_bs256.py',
+    '../../_base_/default_runtime.py'
+]
--- a/configs/mixmim/metafile.yml
+++ b/configs/mixmim/metafile.yml
+Collections:
+  - Name: MixMIM
+    Metadata:
+      Architecture:
+        - Attention Dropout
+        - Convolution
+        - Dense Connections
+        - Dropout
+        - GELU
+        - Layer Normalization
+        - Multi-Head Attention
+        - Scaled Dot-Product Attention
+        - Tanh Activation
+    Paper:
+      Title: 'MixMIM: Mixed and Masked Image Modeling for Efficient Visual Representation
+        Learning'
+      URL: https://arxiv.org/abs/2205.13137
+    README: configs/mixmim/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmpretrain/blob/main/mmpretrain/models/backbones/mixmim.py
+      Version: v1.0.0rc4
+Models:
+  - Name: mixmim_mixmim-base_16xb128-coslr-300e_in1k
+    Metadata:
+      Epochs: 300
+      Batch Size: 2048
+      FLOPs: 16351906816
+      Parameters: 114665784
+      Training Data: ImageNet-1k
+    In Collection: MixMIM
+    Results: null
+    Weights: https://download.openmmlab.com/mmselfsup/1.x/mixmim/mixmim-base-p16_16xb128-coslr-300e_in1k/mixmim-base-p16_16xb128-coslr-300e_in1k_20221208-44fe8d2c.pth
+    Config: configs/mixmim/mixmim_mixmim-base_16xb128-coslr-300e_in1k.py
+    Downstream:
+      - mixmim-base_mixmim-pre_8xb128-coslr-100e_in1k
+  - Name: mixmim-base_mixmim-pre_8xb128-coslr-100e_in1k
+    Metadata:
+      Epochs: 100
+      Batch Size: 1024
+      FLOPs: 16351906816
+      Parameters: 88344352
+      Training Data: ImageNet-1k
+    In Collection: MixMIM
+    Results:
+      - Task: Image Classification
+        Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 84.63
+    Weights: https://download.openmmlab.com/mmselfsup/1.x/mixmim/mixmim-base-p16_16xb128-coslr-300e_in1k/mixmim-base-p16_ft-8xb128-coslr-100e_in1k/mixmim-base-p16_ft-8xb128-coslr-100e_in1k_20221208-41ecada9.pth
+    Config: configs/mixmim/benchmarks/mixmim-base_8xb128-coslr-100e_in1k.py
--- a/configs/mixmim/mixmim_mixmim-base_16xb128-coslr-300e_in1k.py
+++ b/configs/mixmim/mixmim_mixmim-base_16xb128-coslr-300e_in1k.py
+_base_ = '../_base_/default_runtime.py'
+# dataset settings
+dataset_type = 'ImageNet'
+data_root = 'data/imagenet/'
+data_preprocessor = dict(
+    type='SelfSupDataPreprocessor',
+    mean=[123.675, 116.28, 103.53],
+    std=[58.395, 57.12, 57.375],
+    to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomResizedCrop',
+        scale=224,
+        crop_ratio_range=(0.2, 1.0),
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackInputs')
+]
+train_dataloader = dict(
+    batch_size=128,
+    num_workers=8,
+    persistent_workers=True,
+    pin_memory=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    collate_fn=dict(type='default_collate'),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='meta/train.txt',
+        data_prefix=dict(img_path='train/'),
+        pipeline=train_pipeline))
+# model settings
+model = dict(
+    type='MixMIM',
+    backbone=dict(
+        type='MixMIMPretrainTransformer',
+        arch='B',
+        drop_rate=0.0,
+        drop_path_rate=0.0,  # drop_path_rate=0.0 during pretraining
+        mask_ratio=0.5),
+    neck=dict(
+        type='MixMIMPretrainDecoder',
+        num_patches=49,
+        encoder_stride=32,
+        embed_dim=1024,
+        decoder_embed_dim=512,
+        decoder_depth=8,
+        decoder_num_heads=16),
+    head=dict(
+        type='MixMIMPretrainHead',
+        norm_pix=True,
+        loss=dict(type='PixelReconstructionLoss', criterion='L2')))
+# optimizer wrapper
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='AdamW',
+        lr=1.5e-4 * (2048 / 256),
+        betas=(0.9, 0.95),
+        weight_decay=0.05),
+    paramwise_cfg=dict(custom_keys={
+        'ln': dict(decay_mult=0.0),
+        'bias': dict(decay_mult=0.0)
+    }))
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1e-4,
+        by_epoch=True,
+        begin=0,
+        end=40,
+        convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=260,
+        by_epoch=True,
+        begin=40,
+        end=300,
+        convert_to_iter_based=True)
+]
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=300)
+default_hooks = dict(
+    checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=1))
+randomness = dict(seed=0, diff_rank_seed=True)
+# NOTE: `auto_scale_lr` is for automatically scaling LR
+# based on the actual training batch size.
+auto_scale_lr = dict(base_batch_size=2048)
--- a/configs/mlp_mixer/README.md
+++ b/configs/mlp_mixer/README.md
+# MLP-Mixer
+> [MLP-Mixer: An all-MLP Architecture for Vision](https://arxiv.org/abs/2105.01601)
+<!-- [ALGORITHM] -->
+## Abstract
+Convolutional Neural Networks (CNNs) are the go-to model for computer vision. Recently, attention-based networks, such as the Vision Transformer, have also become popular. In this paper we show that while convolutions and attention are both sufficient for good performance, neither of them are necessary. We present MLP-Mixer, an architecture based exclusively on multi-layer perceptrons (MLPs). MLP-Mixer contains two types of layers: one with MLPs applied independently to image patches (i.e. "mixing" the per-location features), and one with MLPs applied across patches (i.e. "mixing" spatial information). When trained on large datasets, or with modern regularization schemes, MLP-Mixer attains competitive scores on image classification benchmarks, with pre-training and inference cost comparable to state-of-the-art models. We hope that these results spark further research beyond the realms of well established CNNs and Transformers.
+<div align=center>
+<img src="https://user-images.githubusercontent.com/26739999/143178327-7118b48a-5f5f-4844-a614-a571917384ca.png" width="90%"/>
+</div>
+## How to use it?
+<!-- [TABS-BEGIN] -->
+**Predict image**
+```python
+from mmpretrain import inference_model
+predict = inference_model('mlp-mixer-base-p16_3rdparty_64xb64_in1k', 'demo/bird.JPEG')
+print(predict['pred_class'])
+print(predict['pred_score'])
+```
+**Use the model**
+```python
+import torch
+from mmpretrain import get_model
+model = get_model('mlp-mixer-base-p16_3rdparty_64xb64_in1k', pretrained=True)
+inputs = torch.rand(1, 3, 224, 224)
+out = model(inputs)
+print(type(out))
+# To extract features.
+feats = model.extract_feat(inputs)
+print(type(feats))
+```
+**Test Command**
+Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
+Test:
+```shell
+python tools/test.py configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py https://download.openmmlab.com/mmclassification/v0/mlp-mixer/mixer-base-p16_3rdparty_64xb64_in1k_20211124-1377e3e0.pth
+```
+<!-- [TABS-END] -->
+## Models and results
+### Image Classification on ImageNet-1k
+| Model                                        |   Pretrain   | Params (M) | Flops (G) | Top-1 (%) | Top-5 (%) |                    Config                    |                            Download                             |
+| :------------------------------------------- | :----------: | :--------: | :-------: | :-------: | :-------: | :------------------------------------------: | :-------------------------------------------------------------: |
+| `mlp-mixer-base-p16_3rdparty_64xb64_in1k`\*  | From scratch |   59.88    |   12.61   |   76.68   |   92.25   | [config](mlp-mixer-base-p16_64xb64_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/mlp-mixer/mixer-base-p16_3rdparty_64xb64_in1k_20211124-1377e3e0.pth) |
+| `mlp-mixer-large-p16_3rdparty_64xb64_in1k`\* | From scratch |   208.20   |   44.57   |   72.34   |   88.02   | [config](mlp-mixer-large-p16_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mlp-mixer/mixer-large-p16_3rdparty_64xb64_in1k_20211124-5a2519d2.pth) |
+*Models with * are converted from the [timm](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/mlp_mixer.py). The config files of these models are only for inference. We haven't reproduce the training results.*
+## Citation
+```bibtex
+@misc{tolstikhin2021mlpmixer,
+      title={MLP-Mixer: An all-MLP Architecture for Vision},
+      author={Ilya Tolstikhin and Neil Houlsby and Alexander Kolesnikov and Lucas Beyer and Xiaohua Zhai and Thomas Unterthiner and Jessica Yung and Andreas Steiner and Daniel Keysers and Jakob Uszkoreit and Mario Lucic and Alexey Dosovitskiy},
+      year={2021},
+      eprint={2105.01601},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
--- a/configs/mlp_mixer/metafile.yml
+++ b/configs/mlp_mixer/metafile.yml
+Collections:
+  - Name: MLP-Mixer
+    Metadata:
+      Training Data: ImageNet-1k
+      Architecture:
+        - MLP
+        - Layer Normalization
+        - Dropout
+    Paper:
+      URL: https://arxiv.org/abs/2105.01601
+      Title: "MLP-Mixer: An all-MLP Architecture for Vision"
+    README: configs/mlp_mixer/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmpretrain/blob/v0.18.0/mmcls/models/backbones/mlp_mixer.py
+      Version: v0.18.0
+Models:
+  - Name: mlp-mixer-base-p16_3rdparty_64xb64_in1k
+    In Collection: MLP-Mixer
+    Config: configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py
+    Metadata:
+      FLOPs: 12610000000  # 12.61 G
+      Parameters: 59880000  # 59.88 M
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 76.68
+          Top 5 Accuracy: 92.25
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/mlp-mixer/mixer-base-p16_3rdparty_64xb64_in1k_20211124-1377e3e0.pth
+    Converted From:
+      Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_b16_224-76587d61.pth
+      Code: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/mlp_mixer.py#L70
+  - Name: mlp-mixer-large-p16_3rdparty_64xb64_in1k
+    In Collection: MLP-Mixer
+    Config: configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py
+    Metadata:
+      FLOPs: 44570000000  # 44.57 G
+      Parameters: 208200000  # 208.2 M
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 72.34
+          Top 5 Accuracy: 88.02
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/mlp-mixer/mixer-large-p16_3rdparty_64xb64_in1k_20211124-5a2519d2.pth
+    Converted From:
+      Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_b16_224_in21k-617b3de2.pth
+      Code: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/mlp_mixer.py#L73
--- a/configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py
+++ b/configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py
+_base_ = [
+    '../_base_/models/mlp_mixer_base_patch16.py',
+    '../_base_/datasets/imagenet_bs64_mixer_224.py',
+    '../_base_/schedules/imagenet_bs4096_AdamW.py',
+    '../_base_/default_runtime.py',
+]
+optim_wrapper = dict(clip_grad=dict(max_norm=1.0))
--- a/configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py
+++ b/configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py
+_base_ = [
+    '../_base_/models/mlp_mixer_large_patch16.py',
+    '../_base_/datasets/imagenet_bs64_mixer_224.py',
+    '../_base_/schedules/imagenet_bs4096_AdamW.py',
+    '../_base_/default_runtime.py',
+]
+optim_wrapper = dict(clip_grad=dict(max_norm=1.0))
--- a/configs/mobilenet_v2/README.md
+++ b/configs/mobilenet_v2/README.md
+# MobileNet V2
+> [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381)
+<!-- [ALGORITHM] -->
+## Introduction
+**MobileNet V2** is initially described in [the paper](https://arxiv.org/pdf/1801.04381.pdf), which improves the state of the art performance of mobile models on multiple tasks. MobileNetV2 is an improvement on V1. Its new ideas include Linear Bottleneck and Inverted Residuals, and is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers. The intermediate expansion layer uses lightweight depthwise convolutions to filter features as a source of non-linearity. The author of MobileNet V2 measure its performance on Imagenet classification, COCO object detection, and VOC image segmentation.
+<div align=center>
+<img src="https://user-images.githubusercontent.com/26739999/142563365-7a9ea577-8f79-4c21-a750-ebcaad9bcc2f.png" width="60%"/>
+</div>
+## Abstract
+<details>
+<summary>Show the paper's abstract</summary>
+<br>
+In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3.
+The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters.
+</br>
+</details>
+## How to use it?
+<!-- [TABS-BEGIN] -->
+**Predict image**
+```python
+from mmpretrain import inference_model
+predict = inference_model('mobilenet-v2_8xb32_in1k', 'demo/bird.JPEG')
+print(predict['pred_class'])
+print(predict['pred_score'])
+```
+**Use the model**
+```python
+import torch
+from mmpretrain import get_model
+model = get_model('mobilenet-v2_8xb32_in1k', pretrained=True)
+inputs = torch.rand(1, 3, 224, 224)
+out = model(inputs)
+print(type(out))
+# To extract features.
+feats = model.extract_feat(inputs)
+print(type(feats))
+```
+**Train/Test Command**
+Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
+Train:
+```shell
+python tools/train.py configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py
+```
+Test:
+```shell
+python tools/test.py configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth
+```
+<!-- [TABS-END] -->
+## Models and results
+### Image Classification on ImageNet-1k
+| Model                     |   Pretrain   | Params (M) | Flops (G) | Top-1 (%) | Top-5 (%) |                Config                |                                          Download                                          |
+| :------------------------ | :----------: | :--------: | :-------: | :-------: | :-------: | :----------------------------------: | :----------------------------------------------------------------------------------------: |
+| `mobilenet-v2_8xb32_in1k` | From scratch |    3.50    |   0.32    |   71.86   |   90.42   | [config](mobilenet-v2_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.json) |
+## Citation
+```bibtex
+@INPROCEEDINGS{8578572,
+  author={M. {Sandler} and A. {Howard} and M. {Zhu} and A. {Zhmoginov} and L. {Chen}},
+  booktitle={2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  title={MobileNetV2: Inverted Residuals and Linear Bottlenecks},
+  year={2018},
+  volume={},
+  number={},
+  pages={4510-4520},
+  doi={10.1109/CVPR.2018.00474}}
+}
+```
--- a/configs/mobilenet_v2/metafile.yml
+++ b/configs/mobilenet_v2/metafile.yml
+Collections:
+  - Name: MobileNet V2
+    Metadata:
+      Training Data: ImageNet-1k
+      Training Techniques:
+        - SGD with Momentum
+        - Weight Decay
+      Training Resources: 8x V100 GPUs
+      Epochs: 300
+      Batch Size: 256
+      Architecture:
+        - MobileNet V2
+    Paper:
+      URL: https://arxiv.org/abs/1801.04381
+      Title: "MobileNetV2: Inverted Residuals and Linear Bottlenecks"
+    README: configs/mobilenet_v2/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmpretrain/blob/v0.15.0/mmcls/models/backbones/mobilenet_v2.py#L101
+      Version: v0.15.0
+Models:
+  - Name: mobilenet-v2_8xb32_in1k
+    Metadata:
+      FLOPs: 319000000
+      Parameters: 3500000
+    In Collection: MobileNet V2
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 71.86
+          Top 5 Accuracy: 90.42
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth
+    Config: configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py
--- a/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py
+++ b/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py
+_base_ = [
+    '../_base_/models/mobilenet_v2_1x.py',
+    '../_base_/datasets/imagenet_bs32_pil_resize.py',
+    '../_base_/schedules/imagenet_bs256_epochstep.py',
+    '../_base_/default_runtime.py'
+]
--- a/configs/mobilenet_v3/README.md
+++ b/configs/mobilenet_v3/README.md
+# MobileNet V3
+> [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)
+<!-- [ALGORITHM] -->
+## Introduction
+**MobileNet V3** is initially described in [the paper](https://arxiv.org/pdf/1905.02244.pdf). MobileNetV3 parameters are obtained by NAS (network architecture search) search, and some practical results of V1 and V2 are inherited, and the attention mechanism of SE channel is attracted, which can be considered as a masterpiece. The author create two new MobileNet models for release: MobileNetV3-Large and MobileNetV3-Small which are targeted for high and low resource use cases. These models are then adapted and applied to the tasks of object detection and semantic segmentation. The author of MobileNet V3 measure its performance on Imagenet classification, COCO object detection, and Cityscapes segmentation.
+<div align=center>
+<img src="https://user-images.githubusercontent.com/26739999/142563801-ef4feacc-ecd7-4d14-a411-8c9d63571749.png" width="60%"/>
+</div>
+## Abstract
+<details>
+<summary>Show the paper's abstract</summary>
+<br>
+We present the next generation of MobileNets based on a combination of complementary search techniques as well as a novel architecture design. MobileNetV3 is tuned to mobile phone CPUs through a combination of hardware-aware network architecture search (NAS) complemented by the NetAdapt algorithm and then subsequently improved through novel architecture advances. This paper starts the exploration of how automated search algorithms and network design can work together to harness complementary approaches improving the overall state of the art. Through this process we create two new MobileNet models for release: MobileNetV3-Large and MobileNetV3-Small which are targeted for high and low resource use cases. These models are then adapted and applied to the tasks of object detection and semantic segmentation. For the task of semantic segmentation (or any dense pixel prediction), we propose a new efficient segmentation decoder Lite Reduced Atrous Spatial Pyramid Pooling (LR-ASPP). We achieve new state of the art results for mobile classification, detection and segmentation. MobileNetV3-Large is 3.2% more accurate on ImageNet classification while reducing latency by 15% compared to MobileNetV2. MobileNetV3-Small is 4.6% more accurate while reducing latency by 5% compared to MobileNetV2. MobileNetV3-Large detection is 25% faster at roughly the same accuracy as MobileNetV2 on COCO detection. MobileNetV3-Large LR-ASPP is 30% faster than MobileNetV2 R-ASPP at similar accuracy for Cityscapes segmentation.
+</br>
+</details>
+## How to use it?
+<!-- [TABS-BEGIN] -->
+**Predict image**
+```python
+from mmpretrain import inference_model
+predict = inference_model('mobilenet-v3-small-050_3rdparty_in1k', 'demo/bird.JPEG')
+print(predict['pred_class'])
+print(predict['pred_score'])
+```
+**Use the model**
+```python
+import torch
+from mmpretrain import get_model
+model = get_model('mobilenet-v3-small-050_3rdparty_in1k', pretrained=True)
+inputs = torch.rand(1, 3, 224, 224)
+out = model(inputs)
+print(type(out))
+# To extract features.
+feats = model.extract_feat(inputs)
+print(type(feats))
+```
+**Train/Test Command**
+Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
+Train:
+```shell
+python tools/train.py configs/mobilenet_v3/mobilenet-v3-small_8xb128_in1k.py
+```
+Test:
+```shell
+python tools/test.py configs/mobilenet_v3/mobilenet-v3-small-050_8xb128_in1k.py https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-small-050_3rdparty_in1k_20221114-e0b86be1.pth
+```
+<!-- [TABS-END] -->
+## Models and results
+### Image Classification on ImageNet-1k
+| Model                                    |   Pretrain   | Params (M) | Flops (G) | Top-1 (%) | Top-5 (%) |                     Config                      |                             Download                             |
+| :--------------------------------------- | :----------: | :--------: | :-------: | :-------: | :-------: | :---------------------------------------------: | :--------------------------------------------------------------: |
+| `mobilenet-v3-small-050_3rdparty_in1k`\* | From scratch |    1.59    |   0.02    |   57.91   |   80.19   | [config](mobilenet-v3-small-050_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-small-050_3rdparty_in1k_20221114-e0b86be1.pth) |
+| `mobilenet-v3-small-075_3rdparty_in1k`\* | From scratch |    2.04    |   0.04    |   65.23   |   85.44   | [config](mobilenet-v3-small-075_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-small-075_3rdparty_in1k_20221114-2011fa76.pth) |
+| `mobilenet-v3-small_8xb128_in1k`         | From scratch |    2.54    |   0.06    |   66.68   |   86.74   |   [config](mobilenet-v3-small_8xb128_in1k.py)   | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-small_8xb128_in1k_20221114-bd1bfcde.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-small_8xb128_in1k_20221114-bd1bfcde.json) |
+| `mobilenet-v3-small_3rdparty_in1k`\*     | From scratch |    2.54    |   0.06    |   67.66   |   87.41   |   [config](mobilenet-v3-small_8xb128_in1k.py)   | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth) |
+| `mobilenet-v3-large_8xb128_in1k`         | From scratch |    5.48    |   0.23    |   73.49   |   91.31   |   [config](mobilenet-v3-large_8xb128_in1k.py)   | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-large_8xb128_in1k_20221114-0ed9ed9a.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-large_8xb128_in1k_20221114-0ed9ed9a.json) |
+| `mobilenet-v3-large_3rdparty_in1k`\*     | From scratch |    5.48    |   0.23    |   74.04   |   91.34   |   [config](mobilenet-v3-large_8xb128_in1k.py)   | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_large-3ea3c186.pth) |
+*Models with * are converted from the [official repo](https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py). The config files of these models are only for inference. We haven't reproduce the training results.*
+## Citation
+```bibtex
+@inproceedings{Howard_2019_ICCV,
+    author = {Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and Le, Quoc V. and Adam, Hartwig},
+    title = {Searching for MobileNetV3},
+    booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
+    month = {October},
+    year = {2019}
+}
+```
--- a/configs/mobilenet_v3/metafile.yml
+++ b/configs/mobilenet_v3/metafile.yml
+Collections:
+  - Name: MobileNet V3
+    Metadata:
+      Training Data: ImageNet-1k
+      Training Techniques:
+        - RMSprop with Momentum
+        - Weight Decay
+      Training Resources: 8x V100 GPUs
+      Epochs: 600
+      Batch Size: 1024
+      Architecture:
+        - MobileNet V3
+    Paper:
+      URL: https://arxiv.org/abs/1905.02244
+      Title: Searching for MobileNetV3
+    README: configs/mobilenet_v3/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmpretrain/blob/v0.15.0/mmcls/models/backbones/mobilenet_v3.py
+      Version: v0.15.0
+Models:
+  - Name: mobilenet-v3-small-050_3rdparty_in1k
+    Metadata:
+      FLOPs: 24895000
+      Parameters: 1590000
+    In Collection: MobileNet V3
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 57.91
+          Top 5 Accuracy: 80.19
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-small-050_3rdparty_in1k_20221114-e0b86be1.pth
+    Config: configs/mobilenet_v3/mobilenet-v3-small-050_8xb128_in1k.py
+    Converted From:
+      Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_small_050_lambc-4b7bbe87.pth
+      Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/mobilenetv3.py
+  - Name: mobilenet-v3-small-075_3rdparty_in1k
+    Metadata:
+      FLOPs: 44791000
+      Parameters: 2040000
+    In Collection: MobileNet V3
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 65.23
+          Top 5 Accuracy: 85.44
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-small-075_3rdparty_in1k_20221114-2011fa76.pth
+    Config: configs/mobilenet_v3/mobilenet-v3-small-075_8xb128_in1k.py
+    Converted From:
+      Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_small_075_lambc-384766db.pth
+      Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/mobilenetv3.py
+  - Name: mobilenet-v3-small_8xb128_in1k
+    Metadata:
+      FLOPs: 60000000
+      Parameters: 2540000
+    In Collection: MobileNet V3
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 66.68
+          Top 5 Accuracy: 86.74
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-small_8xb128_in1k_20221114-bd1bfcde.pth
+    Config: configs/mobilenet_v3/mobilenet-v3-small_8xb128_in1k.py
+  - Name: mobilenet-v3-small_3rdparty_in1k
+    Metadata:
+      FLOPs: 60000000
+      Parameters: 2540000
+    In Collection: MobileNet V3
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 67.66
+          Top 5 Accuracy: 87.41
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth
+    Config: configs/mobilenet_v3/mobilenet-v3-small_8xb128_in1k.py
+    Converted From:
+      Weights: https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth
+      Code: https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py
+  - Name: mobilenet-v3-large_8xb128_in1k
+    Metadata:
+      FLOPs: 230000000
+      Parameters: 5480000
+    In Collection: MobileNet V3
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 73.49
+          Top 5 Accuracy: 91.31
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/mobilenet-v3-large_8xb128_in1k_20221114-0ed9ed9a.pth
+    Config: configs/mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py
+  - Name: mobilenet-v3-large_3rdparty_in1k
+    Metadata:
+      FLOPs: 230000000
+      Parameters: 5480000
+    In Collection: MobileNet V3
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 74.04
+          Top 5 Accuracy: 91.34
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_large-3ea3c186.pth
+    Config: configs/mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py
+    Converted From:
+      Weights: https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth
+      Code: https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py
--- a/configs/mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py
+++ b/configs/mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py
+# Refers to https://pytorch.org/blog/ml-models-torchvision-v0.9/#classification
+_base_ = [
+    '../_base_/models/mobilenet_v3/mobilenet_v3_large_imagenet.py',
+    '../_base_/datasets/imagenet_bs128_mbv3.py',
+    '../_base_/default_runtime.py',
+]
+# schedule settings
+optim_wrapper = dict(
+    optimizer=dict(
+        type='RMSprop',
+        lr=0.064,
+        alpha=0.9,
+        momentum=0.9,
+        eps=0.0316,
+        weight_decay=1e-5))
+param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973)
+train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
+val_cfg = dict()
+test_cfg = dict()
+# NOTE: `auto_scale_lr` is for automatically scaling LR
+# based on the actual training batch size.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/mobilenet_v3/mobilenet-v3-small-050_8xb128_in1k.py
+++ b/configs/mobilenet_v3/mobilenet-v3-small-050_8xb128_in1k.py
+_base_ = [
+    '../_base_/models/mobilenet_v3/mobilenet_v3_small_050_imagenet.py',
+    '../_base_/datasets/imagenet_bs128_mbv3.py',
+    '../_base_/default_runtime.py',
+]
+# model settings
+model = dict(backbone=dict(norm_cfg=dict(type='BN', eps=1e-5, momentum=0.1)))
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomResizedCrop',
+        scale=224,
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
+    dict(
+        type='AutoAugment',
+        policies='imagenet',
+        hparams=dict(pad_val=[round(x) for x in [103.53, 116.28, 123.675]])),
+    dict(
+        type='RandomErasing',
+        erase_prob=0.2,
+        mode='rand',
+        min_area_ratio=0.02,
+        max_area_ratio=1 / 3,
+        fill_color=[103.53, 116.28, 123.675],
+        fill_std=[57.375, 57.12, 58.395]),
+    dict(type='PackInputs'),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeEdge',
+        scale=256,
+        edge='short',
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='CenterCrop', crop_size=224),
+    dict(type='PackInputs'),
+]
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+# If you want standard test, please manually configure the test dataset
+test_dataloader = val_dataloader
+# schedule settings
+optim_wrapper = dict(
+    optimizer=dict(
+        type='RMSprop',
+        lr=0.064,
+        alpha=0.9,
+        momentum=0.9,
+        eps=0.0316,
+        weight_decay=1e-5))
+param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973)
+train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=10)
+val_cfg = dict()
+test_cfg = dict()
+# NOTE: `auto_scale_lr` is for automatically scaling LR
+# based on the actual training batch size.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/mobilenet_v3/mobilenet-v3-small-075_8xb128_in1k.py
+++ b/configs/mobilenet_v3/mobilenet-v3-small-075_8xb128_in1k.py
+_base_ = [
+    '../_base_/models/mobilenet_v3/mobilenet_v3_small_075_imagenet.py',
+    '../_base_/datasets/imagenet_bs128_mbv3.py',
+    '../_base_/default_runtime.py',
+]
+# model settings
+model = dict(backbone=dict(norm_cfg=dict(type='BN', eps=1e-5, momentum=0.1)))
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomResizedCrop',
+        scale=224,
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
+    dict(
+        type='AutoAugment',
+        policies='imagenet',
+        hparams=dict(pad_val=[round(x) for x in [103.53, 116.28, 123.675]])),
+    dict(
+        type='RandomErasing',
+        erase_prob=0.2,
+        mode='rand',
+        min_area_ratio=0.02,
+        max_area_ratio=1 / 3,
+        fill_color=[103.53, 116.28, 123.675],
+        fill_std=[57.375, 57.12, 58.395]),
+    dict(type='PackInputs'),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeEdge',
+        scale=256,
+        edge='short',
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='CenterCrop', crop_size=224),
+    dict(type='PackInputs'),
+]
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+# schedule settings
+optim_wrapper = dict(
+    optimizer=dict(
+        type='RMSprop',
+        lr=0.064,
+        alpha=0.9,
+        momentum=0.9,
+        eps=0.0316,
+        weight_decay=1e-5))
+param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973)
+train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=10)
+val_cfg = dict()
+test_cfg = dict()
+# NOTE: `auto_scale_lr` is for automatically scaling LR
+# based on the actual training batch size.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/mobilenet_v3/mobilenet-v3-small_8xb128_in1k.py
+++ b/configs/mobilenet_v3/mobilenet-v3-small_8xb128_in1k.py
+# Refers to https://pytorch.org/blog/ml-models-torchvision-v0.9/#classification
+_base_ = [
+    '../_base_/models/mobilenet_v3/mobilenet_v3_small_imagenet.py',
+    '../_base_/datasets/imagenet_bs128_mbv3.py',
+    '../_base_/default_runtime.py',
+]
+# schedule settings
+optim_wrapper = dict(
+    optimizer=dict(
+        type='RMSprop',
+        lr=0.064,
+        alpha=0.9,
+        momentum=0.9,
+        eps=0.0316,
+        weight_decay=1e-5))
+param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973)
+train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
+val_cfg = dict()
+test_cfg = dict()
+# NOTE: `auto_scale_lr` is for automatically scaling LR
+# based on the actual training batch size.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/mobilenet_v3/mobilenet-v3-small_8xb16_cifar10.py
+++ b/configs/mobilenet_v3/mobilenet-v3-small_8xb16_cifar10.py
+_base_ = [
+    '../_base_/models/mobilenet_v3/mobilenet_v3_small_cifar.py',
+    '../_base_/datasets/cifar10_bs16.py',
+    '../_base_/schedules/cifar10_bs128.py', '../_base_/default_runtime.py'
+]
+# schedule settings
+param_scheduler = dict(
+    type='MultiStepLR',
+    by_epoch=True,
+    milestones=[120, 170],
+    gamma=0.1,
+)
+train_cfg = dict(by_epoch=True, max_epochs=200)
--- a/configs/mobileone/README.md
+++ b/configs/mobileone/README.md
+# MobileOne
+> [An Improved One millisecond Mobile Backbone](https://arxiv.org/abs/2206.04040)
+<!-- [ALGORITHM] -->
+## Introduction
+Mobileone is proposed by apple and based on reparameterization. On the apple chips, the accuracy of the model is close to 0.76 on the ImageNet dataset when the latency is less than 1ms. Its main improvements based on [RepVGG](../repvgg) are fllowing:
+- Reparameterization using Depthwise convolution and Pointwise convolution instead of normal convolution.
+- Removal of the residual structure which is not friendly to access memory.
+<div align=center>
+<img src="https://user-images.githubusercontent.com/18586273/183552452-74657532-f461-48f7-9aa7-c23f006cdb07.png" width="40%"/>
+</div>
+## Abstract
+<details>
+<summary>Show the paper's abstract</summary>
+<br>
+Efficient neural network backbones for mobile devices are often optimized for metrics such as FLOPs or parameter count. However, these metrics may not correlate well with latency of the network when deployed on a mobile device. Therefore, we perform extensive analysis of different metrics by deploying several mobile-friendly networks on a mobile device. We identify and analyze architectural and optimization bottlenecks in recent efficient neural networks and provide ways to mitigate these bottlenecks. To this end, we design an efficient backbone MobileOne, with variants achieving an inference time under 1 ms on an iPhone12 with 75.9% top-1 accuracy on ImageNet. We show that MobileOne achieves state-of-the-art performance within the efficient architectures while being many times faster on mobile. Our best model obtains similar performance on ImageNet as MobileFormer while being 38x faster. Our model obtains 2.3% better top-1 accuracy on ImageNet than EfficientNet at similar latency. Furthermore, we show that our model generalizes to multiple tasks - image classification, object detection, and semantic segmentation with significant improvements in latency and accuracy as compared to existing efficient architectures when deployed on a mobile device.
+</br>
+</details>
+## How to use it?
+<!-- [TABS-BEGIN] -->
+**Predict image**
+```python
+from mmpretrain import inference_model
+predict = inference_model('mobileone-s0_8xb32_in1k', 'demo/bird.JPEG')
+print(predict['pred_class'])
+print(predict['pred_score'])
+```
+**Use the model**
+```python
+import torch
+from mmpretrain import get_model
+model = get_model('mobileone-s0_8xb32_in1k', pretrained=True)
+inputs = torch.rand(1, 3, 224, 224)
+out = model(inputs)
+print(type(out))
+# To extract features.
+feats = model.extract_feat(inputs)
+print(type(feats))
+```
+**Train/Test Command**
+Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
+Train:
+```shell
+python tools/train.py configs/mobileone/mobileone-s0_8xb32_in1k.py
+```
+Test:
+```shell
+python tools/test.py configs/mobileone/mobileone-s0_8xb32_in1k.py https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s0_8xb32_in1k_20221110-0bc94952.pth
+```
+<!-- [TABS-END] -->
+## Models and results
+### Image Classification on ImageNet-1k
+| Model                     |   Pretrain   | Params (M) | Flops (G) | Top-1 (%) | Top-5 (%) |                Config                |                                          Download                                          |
+| :------------------------ | :----------: | :--------: | :-------: | :-------: | :-------: | :----------------------------------: | :----------------------------------------------------------------------------------------: |
+| `mobileone-s0_8xb32_in1k` | From scratch |    2.08    |   0.27    |   71.34   |   89.87   | [config](mobileone-s0_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s0_8xb32_in1k_20221110-0bc94952.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s0_8xb32_in1k_20221110-0bc94952.json) |
+| `mobileone-s1_8xb32_in1k` | From scratch |    4.76    |   0.82    |   75.72   |   92.54   | [config](mobileone-s1_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s1_8xb32_in1k_20221110-ceeef467.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s1_8xb32_in1k_20221110-ceeef467.json) |
+| `mobileone-s2_8xb32_in1k` | From scratch |    7.81    |   1.30    |   77.37   |   93.34   | [config](mobileone-s2_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s2_8xb32_in1k_20221110-9c7ecb97.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s2_8xb32_in1k_20221110-9c7ecb97.json) |
+| `mobileone-s3_8xb32_in1k` | From scratch |   10.08    |   1.89    |   78.06   |   93.83   | [config](mobileone-s3_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s3_8xb32_in1k_20221110-c95eb3bf.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s3_8xb32_in1k_20221110-c95eb3bf.json) |
+| `mobileone-s4_8xb32_in1k` | From scratch |   14.84    |   2.98    |   79.69   |   94.46   | [config](mobileone-s4_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s4_8xb32_in1k_20221110-28d888cb.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s4_8xb32_in1k_20221110-28d888cb.json) |
+## Citation
+```bibtex
+@article{mobileone2022,
+  title={An Improved One millisecond Mobile Backbone},
+  author={Vasu, Pavan Kumar Anasosalu and Gabriel, James and Zhu, Jeff and Tuzel, Oncel and Ranjan, Anurag},
+  journal={arXiv preprint arXiv:2206.04040},
+  year={2022}
+}
+```
--- a/configs/mobileone/deploy/mobileone-s0_deploy_8xb32_in1k.py
+++ b/configs/mobileone/deploy/mobileone-s0_deploy_8xb32_in1k.py
+_base_ = ['../mobileone-s0_8xb32_in1k.py']
+model = dict(backbone=dict(deploy=True))