添加mmclassification-0.24.1代码，删除mmclassification-speed-benchmark

0fd8347d · unknown · cc567e9e · 0fd8347d · 0fd8347d · 0fd8347d
Commit 0fd8347d authored Jan 08, 2023 by unknown
20 changed files
--- a/openmmlab_test/mmclassification-0.24.1/configs/convnext/convnext-tiny_32xb128_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/convnext/convnext-tiny_32xb128_in1k.py
+_base_ = [
+    '../_base_/models/convnext/convnext-tiny.py',
+    '../_base_/datasets/imagenet_bs64_swin_224.py',
+    '../_base_/schedules/imagenet_bs1024_adamw_swin.py',
+    '../_base_/default_runtime.py',
+]
+
+data = dict(samples_per_gpu=128)
+
+optimizer = dict(lr=4e-3)
+
+custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
--- a/openmmlab_test/mmclassification-0.24.1/configs/convnext/convnext-xlarge_64xb64_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/convnext/convnext-xlarge_64xb64_in1k.py
+_base_ = [
+    '../_base_/models/convnext/convnext-xlarge.py',
+    '../_base_/datasets/imagenet_bs64_swin_224.py',
+    '../_base_/schedules/imagenet_bs1024_adamw_swin.py',
+    '../_base_/default_runtime.py',
+]
+
+data = dict(samples_per_gpu=64)
+
+optimizer = dict(lr=4e-3)
+
+custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
--- a/openmmlab_test/mmclassification-0.24.1/configs/convnext/metafile.yml
+++ b/openmmlab_test/mmclassification-0.24.1/configs/convnext/metafile.yml
+Collections:
+  - Name: ConvNeXt
+    Metadata:
+      Training Data: ImageNet-1k
+      Architecture:
+        - 1x1 Convolution
+        - LayerScale
+    Paper:
+      URL: https://arxiv.org/abs/2201.03545v1
+      Title: A ConvNet for the 2020s
+    README: configs/convnext/README.md
+    Code:
+      Version: v0.20.1
+      URL: https://github.com/open-mmlab/mmclassification/blob/v0.20.1/mmcls/models/backbones/convnext.py
+
+Models:
+  - Name: convnext-tiny_3rdparty_32xb128_in1k
+    Metadata:
+      FLOPs: 4457472768
+      Parameters: 28589128
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 82.05
+          Top 5 Accuracy: 95.86
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128_in1k_20220124-18abde00.pth
+    Config: configs/convnext/convnext-tiny_32xb128_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-tiny_3rdparty_32xb128-noema_in1k
+    Metadata:
+      Training Data: ImageNet-1k
+      FLOPs: 4457472768
+      Parameters: 28589128
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 81.81
+          Top 5 Accuracy: 95.67
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128-noema_in1k_20220222-2908964a.pth
+    Config: configs/convnext/convnext-tiny_32xb128_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-small_3rdparty_32xb128_in1k
+    Metadata:
+      FLOPs: 8687008512
+      Parameters: 50223688
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 83.13
+          Top 5 Accuracy: 96.44
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_3rdparty_32xb128_in1k_20220124-d39b5192.pth
+    Config: configs/convnext/convnext-small_32xb128_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-small_3rdparty_32xb128-noema_in1k
+    Metadata:
+      Training Data: ImageNet-1k
+      FLOPs: 8687008512
+      Parameters: 50223688
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 83.11
+          Top 5 Accuracy: 96.34
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_3rdparty_32xb128-noema_in1k_20220222-fa001ca5.pth
+    Config: configs/convnext/convnext-small_32xb128_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-base_3rdparty_32xb128_in1k
+    Metadata:
+      FLOPs: 15359124480
+      Parameters: 88591464
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 83.85
+          Top 5 Accuracy: 96.74
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128_in1k_20220124-d0915162.pth
+    Config: configs/convnext/convnext-base_32xb128_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-base_3rdparty_32xb128-noema_in1k
+    Metadata:
+      Training Data: ImageNet-1k
+      FLOPs: 15359124480
+      Parameters: 88591464
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 83.71
+          Top 5 Accuracy: 96.60
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128-noema_in1k_20220222-dba4f95f.pth
+    Config: configs/convnext/convnext-base_32xb128_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-base_3rdparty_in21k
+    Metadata:
+      Training Data: ImageNet-21k
+      FLOPs: 15359124480
+      Parameters: 88591464
+    In Collections: ConvNeXt
+    Results: null
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_in21k_20220124-13b83eec.pth
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-base_in21k-pre-3rdparty_32xb128_in1k
+    Metadata:
+      Training Data:
+        - ImageNet-21k
+        - ImageNet-1k
+      FLOPs: 15359124480
+      Parameters: 88591464
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 85.81
+          Top 5 Accuracy: 97.86
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_in21k-pre-3rdparty_32xb128_in1k_20220124-eb2d6ada.pth
+    Config: configs/convnext/convnext-base_32xb128_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_1k_224.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-large_3rdparty_64xb64_in1k
+    Metadata:
+      FLOPs: 34368026112
+      Parameters: 197767336
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 84.30
+          Top 5 Accuracy: 96.89
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_64xb64_in1k_20220124-f8a0ded0.pth
+    Config: configs/convnext/convnext-large_64xb64_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-large_3rdparty_in21k
+    Metadata:
+      Training Data: ImageNet-21k
+      FLOPs: 34368026112
+      Parameters: 197767336
+    In Collections: ConvNeXt
+    Results: null
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_in21k_20220124-41b5a79f.pth
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-large_in21k-pre-3rdparty_64xb64_in1k
+    Metadata:
+      Training Data:
+        - ImageNet-21k
+        - ImageNet-1k
+      FLOPs: 34368026112
+      Parameters: 197767336
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 86.61
+          Top 5 Accuracy: 98.04
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_in21k-pre-3rdparty_64xb64_in1k_20220124-2412403d.pth
+    Config: configs/convnext/convnext-large_64xb64_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_1k_224.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-xlarge_3rdparty_in21k
+    Metadata:
+      Training Data: ImageNet-21k
+      FLOPs: 60929820672
+      Parameters: 350196968
+    In Collections: ConvNeXt
+    Results: null
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_3rdparty_in21k_20220124-f909bad7.pth
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
+  - Name: convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k
+    Metadata:
+      Training Data:
+        - ImageNet-21k
+        - ImageNet-1k
+      FLOPs: 60929820672
+      Parameters: 350196968
+    In Collections: ConvNeXt
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 86.97
+          Top 5 Accuracy: 98.20
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k_20220124-76b6863d.pth
+    Config: configs/convnext/convnext-xlarge_64xb64_in1k.py
+    Converted From:
+      Weights: https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_1k_224_ema.pth
+      Code: https://github.com/facebookresearch/ConvNeXt
--- a/openmmlab_test/mmclassification-0.24.1/configs/cspnet/README.md
+++ b/openmmlab_test/mmclassification-0.24.1/configs/cspnet/README.md
+# CSPNet
+
+> [CSPNet: A New Backbone that can Enhance Learning Capability of CNN](https://arxiv.org/abs/1911.11929)
+
+<!-- [ALGORITHM] -->
+
+## Abstract
+
+<!-- [ABSTRACT] -->
+
+Neural networks have enabled state-of-the-art approaches to achieve incredible results on computer vision tasks such as object detection. However, such success greatly relies on costly computation resources, which hinders people with cheap devices from appreciating the advanced technology. In this paper, we propose Cross Stage Partial Network (CSPNet) to mitigate the problem that previous works require heavy inference computations from the network architecture perspective. We attribute the problem to the duplicate gradient information within network optimization. The proposed networks respect the variability of the gradients by integrating feature maps from the beginning and the end of a network stage, which, in our experiments, reduces computations by 20% with equivalent or even superior accuracy on the ImageNet dataset, and significantly outperforms state-of-the-art approaches in terms of AP50 on the MS COCO object detection dataset. The CSPNet is easy to implement and general enough to cope with architectures based on ResNet, ResNeXt, and DenseNet. Source code is at this https URL.
+
+<!-- [IMAGE] -->
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/18586273/159420842-6147c687-a488-460c-8bb2-4ea5276c26c7.png" width="60%"/>
+</div>
+
+## Results and models
+
+### ImageNet-1k
+
+|     Model      |   Pretrain   | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) |                                Config                                |                                Download                                 |
+| :------------: | :----------: | :-------: | :------: | :-------: | :-------: | :------------------------------------------------------------------: | :---------------------------------------------------------------------: |
+| CSPDarkNet50\* | From scratch |   27.64   |   5.04   |   80.05   |   95.07   | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/cspnet/cspdarknet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspdarknet50_3rdparty_8xb32_in1k_20220329-bd275287.pth) |
+| CSPResNet50\*  | From scratch |   21.62   |   3.48   |   79.55   |   94.68   | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/cspnet/cspresnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth) |
+| CSPResNeXt50\* | From scratch |   20.57   |   3.11   |   79.96   |   94.96   | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/cspnet/cspresnext50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnext50_3rdparty_8xb32_in1k_20220329-2cc84d21.pth) |
+
+*Models with * are converted from the [timm repo](https://github.com/rwightman/pytorch-image-models). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
+
+## Citation
+
+```bibtex
+@inproceedings{wang2020cspnet,
+  title={CSPNet: A new backbone that can enhance learning capability of CNN},
+  author={Wang, Chien-Yao and Liao, Hong-Yuan Mark and Wu, Yueh-Hua and Chen, Ping-Yang and Hsieh, Jun-Wei and Yeh, I-Hau},
+  booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition workshops},
+  pages={390--391},
+  year={2020}
+}
+```
--- a/openmmlab_test/mmclassification-0.24.1/configs/cspnet/cspdarknet50_8xb32_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/cspnet/cspdarknet50_8xb32_in1k.py
+_base_ = [
+    '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(type='CSPDarkNet', depth=53),
+    neck=dict(type='GlobalAveragePooling'),
+    head=dict(
+        type='LinearClsHead',
+        num_classes=1000,
+        in_channels=1024,
+        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+        topk=(1, 5),
+    ))
+
+# dataset settings
+dataset_type = 'ImageNet'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomResizedCrop',
+        size=224,
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='Resize',
+        size=(288, -1),
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='CenterCrop', crop_size=256),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+data = dict(
+    samples_per_gpu=32,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_prefix='data/imagenet/train',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_prefix='data/imagenet/val',
+        ann_file='data/imagenet/meta/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        # replace `data/val` with `data/test` for standard test
+        type=dataset_type,
+        data_prefix='data/imagenet/val',
+        ann_file='data/imagenet/meta/val.txt',
+        pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='accuracy')
--- a/openmmlab_test/mmclassification-0.24.1/configs/cspnet/cspresnet50_8xb32_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/cspnet/cspresnet50_8xb32_in1k.py
+_base_ = [
+    '../_base_/datasets/imagenet_bs32_pil_resize.py',
+    '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(type='CSPResNet', depth=50),
+    neck=dict(type='GlobalAveragePooling'),
+    head=dict(
+        type='LinearClsHead',
+        num_classes=1000,
+        in_channels=1024,
+        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+        topk=(1, 5),
+    ))
+
+# dataset settings
+dataset_type = 'ImageNet'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomResizedCrop',
+        size=224,
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='Resize',
+        size=(288, -1),
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='CenterCrop', crop_size=256),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+data = dict(
+    samples_per_gpu=32,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_prefix='data/imagenet/train',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_prefix='data/imagenet/val',
+        ann_file='data/imagenet/meta/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        # replace `data/val` with `data/test` for standard test
+        type=dataset_type,
+        data_prefix='data/imagenet/val',
+        ann_file='data/imagenet/meta/val.txt',
+        pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='accuracy')
--- a/openmmlab_test/mmclassification-0.24.1/configs/cspnet/cspresnext50_8xb32_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/cspnet/cspresnext50_8xb32_in1k.py
+_base_ = [
+    '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(type='CSPResNeXt', depth=50),
+    neck=dict(type='GlobalAveragePooling'),
+    head=dict(
+        type='LinearClsHead',
+        num_classes=1000,
+        in_channels=2048,
+        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+        topk=(1, 5),
+    ))
+
+# dataset settings
+dataset_type = 'ImageNet'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomResizedCrop',
+        size=224,
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='Resize',
+        size=(256, -1),
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='CenterCrop', crop_size=224),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+data = dict(
+    samples_per_gpu=32,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_prefix='data/imagenet/train',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_prefix='data/imagenet/val',
+        ann_file='data/imagenet/meta/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        # replace `data/val` with `data/test` for standard test
+        type=dataset_type,
+        data_prefix='data/imagenet/val',
+        ann_file='data/imagenet/meta/val.txt',
+        pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='accuracy')
--- a/openmmlab_test/mmclassification-0.24.1/configs/cspnet/metafile.yml
+++ b/openmmlab_test/mmclassification-0.24.1/configs/cspnet/metafile.yml
+Collections:
+  - Name: CSPNet
+    Metadata:
+      Training Data: ImageNet-1k
+      Architecture:
+        - Cross Stage Partia Stage
+    Paper:
+      URL: https://arxiv.org/abs/1911.11929
+      Title: 'CSPNet: A New Backbone that can Enhance Learning Capability of CNN'
+    README: configs/cspnet/README.md
+    Code:
+      Version: v0.22.0
+      URL: https://github.com/open-mmlab/mmclassification/blob/v0.22.0/mmcls/models/backbones/cspnet.py
+
+Models:
+  - Name: cspdarknet50_3rdparty_8xb32_in1k
+    Metadata:
+      FLOPs: 5040000000
+      Parameters: 27640000
+    In Collections: CSPNet
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 80.05
+          Top 5 Accuracy: 95.07
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/cspnet/cspdarknet50_3rdparty_8xb32_in1k_20220329-bd275287.pth
+    Config: configs/cspnet/cspdarknet50_8xb32_in1k.py
+    Converted From:
+      Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspdarknet53_ra_256-d05c7c21.pth
+      Code: https://github.com/rwightman/pytorch-image-models
+  - Name: cspresnet50_3rdparty_8xb32_in1k
+    Metadata:
+      Training Data: ImageNet-1k
+      FLOPs: 3480000000
+      Parameters: 21620000
+    In Collections: CSPNet
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 79.55
+          Top 5 Accuracy: 94.68
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth
+    Config: configs/cspnet/cspresnet50_8xb32_in1k.py
+    Converted From:
+      Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspresnet50_ra-d3e8d487.pth
+      Code: https://github.com/rwightman/pytorch-image-models
+  - Name: cspresnext50_3rdparty_8xb32_in1k
+    Metadata:
+      FLOPs: 3110000000
+      Parameters: 20570000
+    In Collections: CSPNet
+    Results:
+      - Dataset: ImageNet-1k
+        Metrics:
+          Top 1 Accuracy: 79.96
+          Top 5 Accuracy: 94.96
+        Task: Image Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnext50_3rdparty_8xb32_in1k_20220329-2cc84d21.pth
+    Config: configs/cspnet/cspresnext50_8xb32_in1k.py
+    Converted From:
+      Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspresnext50_ra_224-648b4713.pth
+      Code: https://github.com/rwightman/pytorch-image-models
--- a/openmmlab_test/mmclassification-0.24.1/configs/csra/README.md
+++ b/openmmlab_test/mmclassification-0.24.1/configs/csra/README.md
+# CSRA
+
+> [Residual Attention: A Simple but Effective Method for Multi-Label Recognition](https://arxiv.org/abs/2108.02456)
+
+<!-- [ALGORITHM] -->
+
+## Abstract
+
+Multi-label image recognition is a challenging computer vision task of practical use. Progresses in this area, however, are often characterized by complicated methods, heavy computations, and lack of intuitive explanations. To effectively capture different spatial regions occupied by objects from different categories, we propose an embarrassingly simple module, named class-specific residual attention (CSRA). CSRA generates class-specific features for every category by proposing a simple spatial attention score, and then combines it with the class-agnostic average pooling feature. CSRA achieves state-of-the-art results on multilabel recognition, and at the same time is much simpler than them. Furthermore, with only 4 lines of code, CSRA also leads to consistent improvement across many diverse pretrained models and datasets without any extra training. CSRA is both easy to implement and light in computations, which also enjoys intuitive explanations and visualizations.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/84259897/176982245-3ffcff56-a4ea-4474-9967-bc2b612bbaa3.png" width="80%"/>
+</div>
+
+## Results and models
+
+### VOC2007
+
+|     Model      |                      Pretrain                      | Params(M) | Flops(G) |  mAP  | OF1 (%) | CF1 (%) |                      Config                       |                      Download                       |
+| :------------: | :------------------------------------------------: | :-------: | :------: | :---: | :-----: | :-----: | :-----------------------------------------------: | :-------------------------------------------------: |
+| Resnet101-CSRA | [ImageNet-1k](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.pth) |   23.55   |   4.12   | 94.98 |  90.80  |  89.16  | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/csra/resnet101-csra_1xb16_voc07-448px.py) | [model](https://download.openmmlab.com/mmclassification/v0/csra/resnet101-csra_1xb16_voc07-448px_20220722-29efb40a.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/csra/resnet101-csra_1xb16_voc07-448px_20220722-29efb40a.log.json) |
+
+## Citation
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2108.02456,
+  doi = {10.48550/ARXIV.2108.02456},
+  url = {https://arxiv.org/abs/2108.02456},
+  author = {Zhu, Ke and Wu, Jianxin},
+  keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
+  title = {Residual Attention: A Simple but Effective Method for Multi-Label Recognition},
+  publisher = {arXiv},
+  year = {2021},
+  copyright = {arXiv.org perpetual, non-exclusive license}
+}
+```
--- a/openmmlab_test/mmclassification-0.24.1/configs/csra/metafile.yml
+++ b/openmmlab_test/mmclassification-0.24.1/configs/csra/metafile.yml
+Collections:
+  - Name: CSRA
+    Metadata:
+      Training Data: PASCAL VOC 2007
+      Architecture:
+        - Class-specific Residual Attention
+    Paper:
+      URL: https://arxiv.org/abs/1911.11929
+      Title: 'Residual Attention: A Simple but Effective Method for Multi-Label Recognition'
+    README: configs/csra/README.md
+    Code:
+      Version: v0.24.0
+      URL: https://github.com/open-mmlab/mmclassification/blob/v0.24.0/mmcls/models/heads/multi_label_csra_head.py
+
+Models:
+  - Name: resnet101-csra_1xb16_voc07-448px
+    Metadata:
+      FLOPs: 4120000000
+      Parameters: 23550000
+    In Collections: CSRA
+    Results:
+      - Dataset: PASCAL VOC 2007
+        Metrics:
+          mAP: 94.98
+          OF1: 90.80
+          CF1: 89.16
+        Task: Multi-Label Classification
+    Weights: https://download.openmmlab.com/mmclassification/v0/csra/resnet101-csra_1xb16_voc07-448px_20220722-29efb40a.pth
+    Config: configs/csra/resnet101-csra_1xb16_voc07-448px.py
--- a/openmmlab_test/mmclassification-0.24.1/configs/csra/resnet101-csra_1xb16_voc07-448px.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/csra/resnet101-csra_1xb16_voc07-448px.py
+_base_ = ['../_base_/datasets/voc_bs16.py', '../_base_/default_runtime.py']
+
+# Pre-trained Checkpoint Path
+checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.pth'  # noqa
+# If you want to use the pre-trained weight of ResNet101-CutMix from
+# the originary repo(https://github.com/Kevinz-code/CSRA). Script of
+# 'tools/convert_models/torchvision_to_mmcls.py' can help you convert weight
+# into mmcls format. The mAP result would hit 95.5 by using the weight.
+# checkpoint = 'PATH/TO/PRE-TRAINED_WEIGHT'
+
+# model settings
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(
+        type='ResNet',
+        depth=101,
+        num_stages=4,
+        out_indices=(3, ),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint, prefix='backbone')),
+    neck=None,
+    head=dict(
+        type='CSRAClsHead',
+        num_classes=20,
+        in_channels=2048,
+        num_heads=1,
+        lam=0.1,
+        loss=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)))
+
+# dataset setting
+img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='RandomResizedCrop', size=448, scale=(0.7, 1.0)),
+    dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='ToTensor', keys=['gt_label']),
+    dict(type='Collect', keys=['img', 'gt_label'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', size=448),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='ImageToTensor', keys=['img']),
+    dict(type='Collect', keys=['img'])
+]
+data = dict(
+    # map the difficult examples as negative ones(0)
+    train=dict(pipeline=train_pipeline, difficult_as_postive=False),
+    val=dict(pipeline=test_pipeline),
+    test=dict(pipeline=test_pipeline))
+
+# optimizer
+# the lr of classifier.head is 10 * base_lr, which help convergence.
+optimizer = dict(
+    type='SGD',
+    lr=0.0002,
+    momentum=0.9,
+    weight_decay=0.0001,
+    paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10)}))
+
+optimizer_config = dict(grad_clip=None)
+
+# learning policy
+lr_config = dict(
+    policy='step',
+    step=6,
+    gamma=0.1,
+    warmup='linear',
+    warmup_iters=1,
+    warmup_ratio=1e-7,
+    warmup_by_epoch=True)
+runner = dict(type='EpochBasedRunner', max_epochs=20)
--- a/openmmlab_test/mmclassification-0.24.1/configs/deit/README.md
+++ b/openmmlab_test/mmclassification-0.24.1/configs/deit/README.md
+# DeiT
+
+> [Training data-efficient image transformers & distillation through attention](https://arxiv.org/abs/2012.12877)
+
+<!-- [ALGORITHM] -->
+
+## Abstract
+
+Recently, neural networks purely based on attention were shown to address image understanding tasks such as image classification. However, these visual transformers are pre-trained with hundreds of millions of images using an expensive infrastructure, thereby limiting their adoption.   In this work, we produce a competitive convolution-free transformer by training on Imagenet only. We train them on a single computer in less than 3 days. Our reference vision transformer (86M parameters) achieves top-1 accuracy of 83.1% (single-crop evaluation) on ImageNet with no external data.   More importantly, we introduce a teacher-student strategy specific to transformers. It relies on a distillation token ensuring that the student learns from the teacher through attention. We show the interest of this token-based distillation, especially when using a convnet as a teacher. This leads us to report results competitive with convnets for both Imagenet (where we obtain up to 85.2% accuracy) and when transferring to other tasks. We share our code and models.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/26739999/143225703-c287c29e-82c9-4c85-a366-dfae30d198cd.png" width="40%"/>
+</div>
+
+## Results and models
+
+### ImageNet-1k
+
+The teacher of the distilled version DeiT is RegNetY-16GF.
+
+|            Model            |   Pretrain   | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) |                             Config                             |                             Download                             |
+| :-------------------------: | :----------: | :-------: | :------: | :-------: | :-------: | :------------------------------------------------------------: | :--------------------------------------------------------------: |
+|          DeiT-tiny          | From scratch |   5.72    |   1.08   |   74.50   |   92.24   | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/deit/deit-tiny_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth)  \| [log](https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.log.json) |
+|    DeiT-tiny distilled\*    | From scratch |   5.72    |   1.08   |   74.51   |   91.90   | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny-distilled_3rdparty_pt-4xb256_in1k_20211216-c429839a.pth) |
+|         DeiT-small          | From scratch |   22.05   |   4.24   |   80.69   |   95.06   | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/deit/deit-small_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-small_pt-4xb256_in1k_20220218-9425b9bb.pth)  \| [log](https://download.openmmlab.com/mmclassification/v0/deit/deit-small_pt-4xb256_in1k_20220218-9425b9bb.log.json) |
+|   DeiT-small distilled\*    | From scratch |   22.05   |   4.24   |   81.17   |   95.40   | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-small-distilled_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-small-distilled_3rdparty_pt-4xb256_in1k_20211216-4de1d725.pth) |
+|          DeiT-base          | From scratch |   86.57   |  16.86   |   81.76   |   95.81   | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/deit/deit-base_pt-16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_pt-16xb64_in1k_20220216-db63c16c.pth)  \| [log](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_pt-16xb64_in1k_20220216-db63c16c.log.json) |
+|         DeiT-base\*         | From scratch |   86.57   |  16.86   |   81.79   |   95.59   | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/deit/deit-base_pt-16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_3rdparty_pt-16xb64_in1k_20211124-6f40c188.pth) |
+|    DeiT-base distilled\*    | From scratch |   86.57   |  16.86   |   83.33   |   96.49   | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-base-distilled_pt-16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base-distilled_3rdparty_pt-16xb64_in1k_20211216-42891296.pth) |
+|      DeiT-base 384px\*      | ImageNet-1k  |   86.86   |  49.37   |   83.04   |   96.31   | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-base_ft-16xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_3rdparty_ft-16xb32_in1k-384px_20211124-822d02f2.pth) |
+| DeiT-base distilled 384px\* | ImageNet-1k  |   86.86   |  49.37   |   85.55   |   97.35   | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base-distilled_3rdparty_ft-16xb32_in1k-384px_20211216-e48d6000.pth) |
+
+*Models with * are converted from the [official repo](https://github.com/facebookresearch/deit). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
+
+```{warning}
+MMClassification doesn't support training the distilled version DeiT.
+And we provide distilled version checkpoints for inference only.
+```
+
+## Citation
+
+```
+@InProceedings{pmlr-v139-touvron21a,
+  title =     {Training data-efficient image transformers &amp; distillation through attention},
+  author =    {Touvron, Hugo and Cord, Matthieu and Douze, Matthijs and Massa, Francisco and Sablayrolles, Alexandre and Jegou, Herve},
+  booktitle = {International Conference on Machine Learning},
+  pages =     {10347--10357},
+  year =      {2021},
+  volume =    {139},
+  month =     {July}
+}
+```
--- a/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py
+_base_ = './deit-base_ft-16xb32_in1k-384px.py'
+
+# model settings
+model = dict(
+    backbone=dict(type='DistilledVisionTransformer'),
+    head=dict(type='DeiTClsHead'),
+    # Change to the path of the pretrained model
+    # init_cfg=dict(type='Pretrained', checkpoint=''),
+)
--- a/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-base-distilled_pt-16xb64_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-base-distilled_pt-16xb64_in1k.py
+_base_ = './deit-small_pt-4xb256_in1k.py'
+
+# model settings
+model = dict(
+    backbone=dict(type='DistilledVisionTransformer', arch='deit-base'),
+    head=dict(type='DeiTClsHead', in_channels=768),
+)
+
+# data settings
+data = dict(samples_per_gpu=64, workers_per_gpu=5)
--- a/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-base_ft-16xb32_in1k-384px.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-base_ft-16xb32_in1k-384px.py
+_base_ = [
+    '../_base_/datasets/imagenet_bs64_swin_384.py',
+    '../_base_/schedules/imagenet_bs4096_AdamW.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(
+        type='VisionTransformer',
+        arch='deit-base',
+        img_size=384,
+        patch_size=16,
+    ),
+    neck=None,
+    head=dict(
+        type='VisionTransformerClsHead',
+        num_classes=1000,
+        in_channels=768,
+        loss=dict(
+            type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
+    ),
+    # Change to the path of the pretrained model
+    # init_cfg=dict(type='Pretrained', checkpoint=''),
+)
+
+# data settings
+data = dict(samples_per_gpu=32, workers_per_gpu=5)
--- a/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-base_pt-16xb64_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-base_pt-16xb64_in1k.py
+_base_ = './deit-small_pt-4xb256_in1k.py'
+
+# model settings
+model = dict(
+    backbone=dict(
+        type='VisionTransformer', arch='deit-base', drop_path_rate=0.1),
+    head=dict(type='VisionTransformerClsHead', in_channels=768),
+)
+
+# data settings
+data = dict(samples_per_gpu=64, workers_per_gpu=5)
+
+custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
--- a/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-small-distilled_pt-4xb256_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-small-distilled_pt-4xb256_in1k.py
+_base_ = './deit-small_pt-4xb256_in1k.py'
+
+# model settings
+model = dict(
+    backbone=dict(type='DistilledVisionTransformer', arch='deit-small'),
+    head=dict(type='DeiTClsHead', in_channels=384),
+)
--- a/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-small_pt-4xb256_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-small_pt-4xb256_in1k.py
+# In small and tiny arch, remove drop path and EMA hook comparing with the
+# original config
+_base_ = [
+    '../_base_/datasets/imagenet_bs64_swin_224.py',
+    '../_base_/schedules/imagenet_bs1024_adamw_swin.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(
+        type='VisionTransformer',
+        arch='deit-small',
+        img_size=224,
+        patch_size=16),
+    neck=None,
+    head=dict(
+        type='VisionTransformerClsHead',
+        num_classes=1000,
+        in_channels=384,
+        loss=dict(
+            type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
+    ),
+    init_cfg=[
+        dict(type='TruncNormal', layer='Linear', std=.02),
+        dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
+    ],
+    train_cfg=dict(augments=[
+        dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
+        dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
+    ]))
+
+# data settings
+data = dict(samples_per_gpu=256, workers_per_gpu=5)
+
+paramwise_cfg = dict(
+    norm_decay_mult=0.0,
+    bias_decay_mult=0.0,
+    custom_keys={
+        '.cls_token': dict(decay_mult=0.0),
+        '.pos_embed': dict(decay_mult=0.0)
+    })
+optimizer = dict(paramwise_cfg=paramwise_cfg)
--- a/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py
+_base_ = './deit-small_pt-4xb256_in1k.py'
+
+# model settings
+model = dict(
+    backbone=dict(type='DistilledVisionTransformer', arch='deit-tiny'),
+    head=dict(type='DeiTClsHead', in_channels=192),
+)
--- a/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-tiny_pt-4xb256_in1k.py
+++ b/openmmlab_test/mmclassification-0.24.1/configs/deit/deit-tiny_pt-4xb256_in1k.py
+_base_ = './deit-small_pt-4xb256_in1k.py'
+
+# model settings
+model = dict(
+    backbone=dict(type='VisionTransformer', arch='deit-tiny'),
+    head=dict(type='VisionTransformerClsHead', in_channels=192),
+)