Commit d476eeba authored by renzhc's avatar renzhc
Browse files

upload mmpretrain

parent 62b8498e
Pipeline #1662 failed with stages
in 0 seconds
# for batch in each gpu is 128, 8 gpu
# lr = 5e-4 * 128 * 8 / 512 = 0.001
# schedule settings
optim_wrapper = dict(
optimizer=dict(
type='AdamW',
lr=5e-4 * 2048 / 512,
weight_decay=0.05,
eps=1e-8,
betas=(0.9, 0.999)),
paramwise_cfg=dict(
norm_decay_mult=0.0,
bias_decay_mult=0.0,
custom_keys={
'.cls_token': dict(decay_mult=0.0),
'.pos_embed': dict(decay_mult=0.0)
}),
clip_grad=dict(max_norm=1.0),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-8 / 2e-3,
by_epoch=True,
end=70,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=70)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=1024)
# for batch in each gpu is 128, 8 gpu
# lr = 5e-4 * 128 * 8 / 512 = 0.001
optim_wrapper = dict(
optimizer=dict(
type='AdamW',
lr=5e-4 * 1024 / 512,
weight_decay=0.05,
eps=1e-8,
betas=(0.9, 0.999)),
paramwise_cfg=dict(
norm_decay_mult=0.0,
bias_decay_mult=0.0,
flat_decay_mult=0.0,
custom_keys={
'.absolute_pos_embed': dict(decay_mult=0.0),
'.relative_position_bias_table': dict(decay_mult=0.0)
}),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=1024)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.8, momentum=0.9, weight_decay=5e-5))
# learning policy
param_scheduler = [
dict(type='LinearLR', start_factor=0.1, by_epoch=True, begin=0, end=5),
dict(type='CosineAnnealingLR', T_max=95, by_epoch=True, begin=5, end=100)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=1024)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=0.00004),
paramwise_cfg=dict(norm_decay_mult=0),
)
# learning policy
param_scheduler = [
dict(type='ConstantLR', factor=0.1, by_epoch=False, begin=0, end=5000),
dict(type='PolyLR', eta_min=0, by_epoch=False, begin=5000)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=1024)
# optimizer
optim_wrapper = dict(
optimizer=dict(
type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True))
# learning policy
param_scheduler = [
dict(
type='LinearLR', start_factor=0.25, by_epoch=False, begin=0, end=2500),
dict(
type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)
# optimizer
# In ClassyVision, the lr is set to 0.003 for bs4096.
# In this implementation(bs2048), lr = 0.003 / 4096 * (32bs * 64gpus) = 0.0015
optim_wrapper = dict(
optimizer=dict(type='AdamW', lr=0.0015, weight_decay=0.3),
# specific to vit pretrain
paramwise_cfg=dict(custom_keys={
'.cls_token': dict(decay_mult=0.0),
'.pos_embed': dict(decay_mult=0.0)
}),
)
# learning policy
warmup_epochs = 15 # about 10000 iterations for ImageNet-1k
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=warmup_epochs,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
eta_min=1e-5,
by_epoch=True,
begin=warmup_epochs)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)
# for batch in each gpu is 256, 8 gpu
# lr = 5e-4 * 256 * 8 / 512 = 0.002
optim_wrapper = dict(
optimizer=dict(
type='AdamW',
lr=0.002,
weight_decay=0.025,
eps=1e-8,
betas=(0.9, 0.999)),
paramwise_cfg=dict(
norm_decay_mult=0.0,
bias_decay_mult=0.0,
custom_keys={
'.attention_biases': dict(decay_mult=0.0),
}),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-6 / 0.002,
by_epoch=True,
end=5,
# update by iter
convert_to_iter_based=True,
),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=5)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=1000)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)
# optimizer
optim_wrapper = dict(
optimizer=dict(
type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True))
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.25,
by_epoch=True,
begin=0,
# about 2500 iterations for ImageNet-1k
end=5,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=95,
by_epoch=True,
begin=5,
end=100,
)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)
# optimizer
optim_wrapper = dict(optimizer=dict(type='Lamb', lr=0.005, weight_decay=0.02))
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.0001,
by_epoch=True,
begin=0,
end=5,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=95,
eta_min=1.0e-6,
by_epoch=True,
begin=5,
end=100)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = dict(
type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = dict(
type='MultiStepLR', by_epoch=True, milestones=[40, 80, 120], gamma=0.1)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=140, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.25,
by_epoch=True,
begin=0,
end=5,
# update by iter
convert_to_iter_based=True,
),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=195,
by_epoch=True,
begin=5,
end=200,
)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=200, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = dict(
type='CosineAnnealingLR', T_max=100, by_epoch=True, begin=0, end=100)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.001,
by_epoch=True,
begin=0,
end=5,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=295,
eta_min=1.0e-6,
by_epoch=True,
begin=5,
end=300),
dict(
type='CosineAnnealingParamScheduler',
param_name='weight_decay',
eta_min=0.00001,
by_epoch=True,
begin=0,
end=300)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.045, momentum=0.9, weight_decay=0.00004))
# learning policy
param_scheduler = dict(type='StepLR', by_epoch=True, step_size=1, gamma=0.98)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=256)
# optimizer
optim_wrapper = dict(
optimizer=dict(type='AdamW', lr=0.003, weight_decay=0.3),
# specific to vit pretrain
paramwise_cfg=dict(custom_keys={
'.cls_token': dict(decay_mult=0.0),
'.pos_embed': dict(decay_mult=0.0)
}),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=True,
begin=0,
end=30,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=270,
by_epoch=True,
begin=30,
end=300,
)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)
# optimizer wrapper
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='LARS', lr=4.8, weight_decay=1e-6, momentum=0.9))
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR', T_max=190, by_epoch=True, begin=10, end=200)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=200)
# optimizer wrapper
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=0.))
# learning rate scheduler
param_scheduler = [
dict(type='CosineAnnealingLR', T_max=90, by_epoch=True, begin=0, end=90)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=90)
val_cfg = dict()
test_cfg = dict()
# optimizer wrapper
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.3, momentum=0.9, weight_decay=1e-6))
# learning rate scheduler
param_scheduler = [
dict(type='CosineAnnealingLR', T_max=100, by_epoch=True, begin=0, end=100)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=100)
val_cfg = dict()
test_cfg = dict()
# optimizer wrapper
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9))
# learning rate scheduler
param_scheduler = [
dict(type='CosineAnnealingLR', T_max=200, by_epoch=True, begin=0, end=200)
]
# runtime settings
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=200)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment