Commit 495d9ed9 authored by limm's avatar limm
Browse files

add part code

parent 59b09903
Pipeline #2799 canceled with stages
# optimizer
optim_wrapper = dict(
optimizer=dict(
type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True))
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.25,
by_epoch=True,
begin=0,
# about 2500 iterations for ImageNet-1k
end=5,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=95,
by_epoch=True,
begin=5,
end=100,
)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)
# optimizer
optim_wrapper = dict(optimizer=dict(type='Lamb', lr=0.005, weight_decay=0.02))
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.0001,
by_epoch=True,
begin=0,
end=5,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=95,
eta_min=1.0e-6,
by_epoch=True,
begin=5,
end=100)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = dict(
type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = dict(
type='MultiStepLR', by_epoch=True, milestones=[40, 80, 120], gamma=0.1)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=140, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.25,
by_epoch=True,
begin=0,
end=5,
# update by iter
convert_to_iter_based=True,
),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=195,
by_epoch=True,
begin=5,
end=200,
)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=200, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = dict(
type='CosineAnnealingLR', T_max=100, by_epoch=True, begin=0, end=100)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.001,
by_epoch=True,
begin=0,
end=5,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=295,
eta_min=1.0e-6,
by_epoch=True,
begin=5,
end=300),
dict(
type='CosineAnnealingParamScheduler',
param_name='weight_decay',
eta_min=0.00001,
by_epoch=True,
begin=0,
end=300)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.045, momentum=0.9, weight_decay=0.00004))
# learning policy
param_scheduler = dict(type='StepLR', by_epoch=True, step_size=1, gamma=0.98)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='AdamW', lr=0.003, weight_decay=0.3),
# specific to vit pretrain
paramwise_cfg=dict(custom_keys={
'.cls_token': dict(decay_mult=0.0),
'.pos_embed': dict(decay_mult=0.0)
}),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=True,
begin=0,
end=30,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=270,
by_epoch=True,
begin=30,
end=300,
)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)
# optimizer wrapper
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='LARS', lr=4.8, weight_decay=1e-6, momentum=0.9))
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR', T_max=190, by_epoch=True, begin=10, end=200)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=200)
# optimizer wrapper
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=0.))
# learning rate scheduler
param_scheduler = [
dict(type='CosineAnnealingLR', T_max=90, by_epoch=True, begin=0, end=90)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=90)
val_cfg = dict()
test_cfg = dict()
# optimizer wrapper
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.3, momentum=0.9, weight_decay=1e-6))
# learning rate scheduler
param_scheduler = [
dict(type='CosineAnnealingLR', T_max=100, by_epoch=True, begin=0, end=100)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=100)
val_cfg = dict()
test_cfg = dict()
# optimizer wrapper
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9))
# learning rate scheduler
param_scheduler = [
dict(type='CosineAnnealingLR', T_max=200, by_epoch=True, begin=0, end=200)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=200)
# optimizer wrapper
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=1e-4))
# learning rate scheduler
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[60, 80], gamma=0.1)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=100)
val_cfg = dict()
test_cfg = dict()
# ArcFace
> [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
<!-- [ALGORITHM] -->
## Abstract
Recently, a popular line of research in face recognition is adopting margins in the well-established softmax loss function to maximize class separability. In this paper, we first introduce an Additive Angular Margin Loss (ArcFace), which not only has a clear geometric interpretation but also significantly enhances the discriminative power. Since ArcFace is susceptible to the massive label noise, we further propose sub-center ArcFace, in which each class contains K sub-centers and training samples only need to be close to any of the K positive sub-centers. Sub-center ArcFace encourages one dominant sub-class that contains the majority of clean faces and non-dominant sub-classes that include hard or noisy faces. Based on this self-propelled isolation, we boost the performance through automatically purifying raw web faces under massive real-world noise. Besides discriminative feature embedding, we also explore the inverse problem, mapping feature vectors to face images. Without training any additional generator or discriminator, the pre-trained ArcFace model can generate identity-preserved face images for both subjects inside and outside the training data only by using the network gradient and Batch Normalization (BN) priors. Extensive experiments demonstrate that ArcFace can enhance the discriminative feature embedding as well as strengthen the generative face synthesis.
<div align=center>
<img src="https://user-images.githubusercontent.com/24734142/212606212-8ffc3cd2-dbc1-4abf-8924-22167f3f6e34.png" width="80%"/>
</div>
## How to use it?
<!-- [TABS-BEGIN] -->
**Retrieve image**
```python
from mmpretrain import ImageRetrievalInferencer
inferencer = ImageRetrievalInferencer('resnet50-arcface_inshop', prototype='demo/')
predict = inferencer('demo/dog.jpg', topk=2)[0]
print(predict[0])
print(predict[1])
```
**Use the model**
```python
import torch
from mmpretrain import get_model
model = get_model('resnet50-arcface_inshop', pretrained=True)
inputs = torch.rand(1, 3, 224, 224)
out = model(inputs)
print(type(out))
# To extract features.
feats = model.extract_feat(inputs)
print(type(feats))
```
**Train/Test Command**
Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
Train:
```shell
python tools/train.py configs/arcface/resnet50-arcface_8xb32_inshop.py
```
Test:
```shell
python tools/test.py configs/arcface/resnet50-arcface_8xb32_inshop.py https://download.openmmlab.com/mmclassification/v0/arcface/resnet50-arcface_inshop_20230202-b766fe7f.pth
```
<!-- [TABS-END] -->
## Models and results
### Image Retrieval on InShop
| Model | Pretrain | Params(M) | Flops(G) | Recall@1 | mAP@10 | Config | Download |
| :-----------------------: | :------------------------------------------------: | :-------: | :------: | :------: | :----: | :------------------------------------------: | :------------------------------------------------: |
| `resnet50-arcface_inshop` | [ImageNet-21k-mill](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_3rdparty-mill_in21k_20220331-faac000b.pth) | 31.69 | 16.48 | 90.18 | 69.30 | [config](./resnet50-arcface_8xb32_inshop.py) | [model](https://download.openmmlab.com/mmclassification/v0/arcface/resnet50-arcface_inshop_20230202-b766fe7f.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/arcface/resnet50-arcface_inshop_20230202-b766fe7f.log) |
## Citation
```bibtex
@inproceedings{deng2018arcface,
title={ArcFace: Additive Angular Margin Loss for Deep Face Recognition},
author={Deng, Jiankang and Guo, Jia and Niannan, Xue and Zafeiriou, Stefanos},
booktitle={CVPR},
year={2019}
}
```
Collections:
- Name: ArcFace
Metadata:
Training Data: InShop
Architecture:
- Additive Angular Margin Loss
Paper:
URL: https://arxiv.org/abs/1801.07698
Title: 'ArcFace: Additive Angular Margin Loss for Deep Face Recognition'
README: configs/arcface/README.md
Code:
Version: v1.0.0rc3
URL: https://github.com/open-mmlab/mmpretrain/blob/v1.0.0rc3/mmcls/models/heads/margin_head.py
Models:
- Name: resnet50-arcface_inshop
Metadata:
FLOPs: 16571226112
Parameters: 31693888
In Collection: ArcFace
Results:
- Dataset: InShop
Metrics:
Recall@1: 90.18
mAP@10: 69.30
Task: Image Retrieval
Weights: https://download.openmmlab.com/mmclassification/v0/arcface/resnet50-arcface_inshop_20230202-b766fe7f.pth
Config: configs/arcface/resnet50-arcface_8xb32_inshop.py
_base_ = [
'../_base_/datasets/inshop_bs32_448.py',
'../_base_/schedules/cub_bs64.py',
'../_base_/default_runtime.py',
]
pretrained = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_3rdparty-mill_in21k_20220331-faac000b.pth' # noqa
model = dict(
type='ImageToImageRetriever',
image_encoder=[
dict(
type='ResNet',
depth=50,
init_cfg=dict(
type='Pretrained', checkpoint=pretrained, prefix='backbone')),
dict(type='GlobalAveragePooling'),
],
head=dict(
type='ArcFaceClsHead',
num_classes=3997,
in_channels=2048,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
init_cfg=None),
prototype={{_base_.gallery_dataloader}})
# runtime settings
default_hooks = dict(
# log every 20 intervals
logger=dict(type='LoggerHook', interval=20),
# save last three checkpoints
checkpoint=dict(
type='CheckpointHook',
save_best='auto',
interval=1,
max_keep_ckpts=3,
rule='greater'))
# optimizer
optim_wrapper = dict(
optimizer=dict(
type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0005, nesterov=True))
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.01,
by_epoch=True,
begin=0,
end=5,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=45,
by_epoch=True,
begin=5,
end=50,
)
]
train_cfg = dict(by_epoch=True, max_epochs=50, val_interval=1)
auto_scale_lr = dict(enable=True, base_batch_size=256)
custom_hooks = [
dict(type='PrepareProtoBeforeValLoopHook'),
dict(type='SyncBuffersHook')
]
# BarlowTwins
> [Barlow Twins: Self-Supervised Learning via Redundancy Reduction](https://arxiv.org/abs/2103.03230)
<!-- [ALGORITHM] -->
## Abstract
Self-supervised learning (SSL) is rapidly closing the gap with supervised methods on large computer vision benchmarks. A successful approach to SSL is to learn embeddings which are invariant to distortions of the input sample. However, a recurring issue with this approach is the existence of trivial constant solutions. Most current methods avoid such solutions by careful implementation details. We propose an objective function that naturally avoids collapse by measuring the cross-correlation matrix between the outputs of two identical networks fed with distorted versions of a sample, and making it as close to the identity matrix as possible. This causes the embedding vectors of distorted versions of a sample to be similar, while minimizing the redundancy between the components of these vectors. The method is called Barlow Twins, owing to neuroscientist H. Barlow's redundancy-reduction principle applied to a pair of identical networks. Barlow Twins does not require large batches nor asymmetry between the network twins such as a predictor network, gradient stopping, or a moving average on the weight updates. Intriguingly it benefits from very high-dimensional output vectors. Barlow Twins outperforms previous methods on ImageNet for semi-supervised classification in the low-data regime, and is on par with current state of the art for ImageNet classification with a linear classifier head, and for transfer tasks of classification and object detection.
<div align=center>
<img src="https://user-images.githubusercontent.com/36138628/163914714-082de804-0b5f-4024-94f9-880e6ef334fa.png" width="800" />
</div>
## How to use it?
<!-- [TABS-BEGIN] -->
**Predict image**
```python
from mmpretrain import inference_model
predict = inference_model('resnet50_barlowtwins-pre_8xb32-linear-coslr-100e_in1k', 'demo/bird.JPEG')
print(predict['pred_class'])
print(predict['pred_score'])
```
**Use the model**
```python
import torch
from mmpretrain import get_model
model = get_model('barlowtwins_resnet50_8xb256-coslr-300e_in1k', pretrained=True)
inputs = torch.rand(1, 3, 224, 224)
out = model(inputs)
print(type(out))
# To extract features.
feats = model.extract_feat(inputs)
print(type(feats))
```
**Train/Test Command**
Prepare your dataset according to the [docs](https://mmpretrain.readthedocs.io/en/latest/user_guides/dataset_prepare.html#prepare-dataset).
Train:
```shell
python tools/train.py configs/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k.py
```
Test:
```shell
python tools/test.py configs/barlowtwins/benchmarks/resnet50_8xb32-linear-coslr-100e_in1k.py https://download.openmmlab.com/mmselfsup/1.x/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k/resnet50_linear-8xb32-coslr-100e_in1k/resnet50_linear-8xb32-coslr-100e_in1k_20220825-52fde35f.pth
```
<!-- [TABS-END] -->
## Models and results
### Pretrained models
| Model | Params (M) | Flops (G) | Config | Download |
| :-------------------------------------------- | :--------: | :-------: | :------------------------------------------------------: | :------------------------------------------------------------------------------: |
| `barlowtwins_resnet50_8xb256-coslr-300e_in1k` | 174.54 | 4.11 | [config](barlowtwins_resnet50_8xb256-coslr-300e_in1k.py) | [model](https://download.openmmlab.com/mmselfsup/1.x/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k/barlowtwins_resnet50_8xb256-coslr-300e_in1k_20220825-57307488.pth) \| [log](https://download.openmmlab.com/mmselfsup/1.x/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k/barlowtwins_resnet50_8xb256-coslr-300e_in1k_20220825-57307488.json) |
### Image Classification on ImageNet-1k
| Model | Pretrain | Params (M) | Flops (G) | Top-1 (%) | Config | Download |
| :---------------------------------------- | :------------------------------------------: | :--------: | :-------: | :-------: | :----------------------------------------: | :-------------------------------------------: |
| `resnet50_barlowtwins-pre_8xb32-linear-coslr-100e_in1k` | [BARLOWTWINS](https://download.openmmlab.com/mmselfsup/1.x/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k/barlowtwins_resnet50_8xb256-coslr-300e_in1k_20220825-57307488.pth) | 25.56 | 4.11 | 71.80 | [config](benchmarks/resnet50_8xb32-linear-coslr-100e_in1k.py) | [model](https://download.openmmlab.com/mmselfsup/1.x/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k/resnet50_linear-8xb32-coslr-100e_in1k/resnet50_linear-8xb32-coslr-100e_in1k_20220825-52fde35f.pth) \| [log](https://download.openmmlab.com/mmselfsup/1.x/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k/resnet50_linear-8xb32-coslr-100e_in1k/resnet50_linear-8xb32-coslr-100e_in1k_20220825-52fde35f.json) |
## Citation
```bibtex
@inproceedings{zbontar2021barlow,
title={Barlow twins: Self-supervised learning via redundancy reduction},
author={Zbontar, Jure and Jing, Li and Misra, Ishan and LeCun, Yann and Deny, St{\'e}phane},
booktitle={International Conference on Machine Learning},
year={2021},
}
```
_base_ = [
'../_base_/datasets/imagenet_bs32_byol.py',
'../_base_/default_runtime.py',
]
# datasets
train_dataloader = dict(batch_size=256)
# model settings
model = dict(
type='BarlowTwins',
backbone=dict(
type='ResNet',
depth=50,
norm_cfg=dict(type='SyncBN'),
zero_init_residual=True),
neck=dict(
type='NonLinearNeck',
in_channels=2048,
hid_channels=8192,
out_channels=8192,
num_layers=3,
with_last_bn=False,
with_last_bn_affine=False,
with_avg_pool=True,
init_cfg=dict(
type='Kaiming', distribution='uniform', layer=['Linear'])),
head=dict(
type='LatentCrossCorrelationHead',
in_channels=8192,
loss=dict(type='CrossCorrelationLoss')))
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=1e-6),
paramwise_cfg=dict(
custom_keys={
'bn': dict(decay_mult=0, lr_mult=0.024, lars_exclude=True),
'bias': dict(decay_mult=0, lr_mult=0.024, lars_exclude=True),
# bn layer in ResNet block downsample module
'downsample.1': dict(
decay_mult=0, lr_mult=0.024, lars_exclude=True),
}))
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.6e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=990,
eta_min=0.0016,
by_epoch=True,
begin=10,
end=1000,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=1000)
default_hooks = dict(checkpoint=dict(max_keep_ckpts=3))
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)
_base_ = [
'../_base_/datasets/imagenet_bs32_byol.py',
'../_base_/default_runtime.py',
]
# datasets
train_dataloader = dict(batch_size=256)
# model settings
model = dict(
type='BarlowTwins',
backbone=dict(
type='ResNet',
depth=50,
norm_cfg=dict(type='SyncBN'),
zero_init_residual=True),
neck=dict(
type='NonLinearNeck',
in_channels=2048,
hid_channels=8192,
out_channels=8192,
num_layers=3,
with_last_bn=False,
with_last_bn_affine=False,
with_avg_pool=True,
init_cfg=dict(
type='Kaiming', distribution='uniform', layer=['Linear'])),
head=dict(
type='LatentCrossCorrelationHead',
in_channels=8192,
loss=dict(type='CrossCorrelationLoss')))
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=1e-6),
paramwise_cfg=dict(
custom_keys={
'bn': dict(decay_mult=0, lr_mult=0.024, lars_exclude=True),
'bias': dict(decay_mult=0, lr_mult=0.024, lars_exclude=True),
# bn layer in ResNet block downsample module
'downsample.1': dict(
decay_mult=0, lr_mult=0.024, lars_exclude=True),
}))
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.6e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=290,
eta_min=0.0016,
by_epoch=True,
begin=10,
end=300,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=300)
default_hooks = dict(checkpoint=dict(max_keep_ckpts=3))
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment