Commit 0bf5e500 authored by Tri Dao's avatar Tri Dao
Browse files

Release training code

parent 9bc63d1e
# @package train.optimizer
_target_: torch.distributed.optim.ZeroRedundancyOptimizer
_recursive_: True
optimizer_class:
_target_: apex.optimizers.FusedAdam
_partial_: True
adam_w_mode: True
# @package train.optimizer
_target_: apex.optimizers.FusedAdam
adam_w_mode: True
# @package train.optimizer
_target_: torch.distributed.optim.ZeroRedundancyOptimizer
_recursive_: True
optimizer_class:
_target_: torch.optim.__getattribute__
_args_:
- "AdamW"
# @package train.optimizer
_target_: torch.optim.AdamW
# @package train.optimizer
_target_: deepspeed.ops.lamb.FusedLamb
# @package train.optimizer
_target_: apex.optimizers.FusedLAMB
# @package train.optimizer
_target_: torch.optim.SGD
# @package train.scheduler
_target_: src.optim.timm_lr_scheduler.TimmCosineLRScheduler
# @package train.scheduler
_target_: transformers.get_cosine_schedule_with_warmup
# @package train.scheduler
_target_: src.optim.lr_scheduler.InvSqrt
num_warmup_steps: ???
# @package train.scheduler
_target_: transformers.get_linear_schedule_with_warmup
# @package train.scheduler
_target_: torch.optim.lr_scheduler.MultiStepLR
# @package _global_
train:
scheduler_interval: epoch
scheduler_monitor: ???
scheduler:
_target_: torch.optim.lr_scheduler.ReduceLROnPlateau
factor: 0.2 # Decay factor when ReduceLROnPlateau is used
patience: 20
min_lr: 0.0 # Minimum learning rate during annealing
# @package train.scheduler
_target_: transformers.get_polynomial_decay_schedule_with_warmup
# @package train.scheduler
_target_: torch.optim.lr_scheduler.StepLR
step_size: ???
_target_: src.tasks.seq.SequenceModel
_target_: pytorch_lightning.Trainer
# default values for all trainer parameters
checkpoint_callback: True
default_root_dir: null
gradient_clip_val: 0.0
process_position: 0
num_nodes: 1
num_processes: 1
gpus: null
auto_select_gpus: False
tpu_cores: null
log_gpu_memory: null
overfit_batches: 0.0
track_grad_norm: -1
check_val_every_n_epoch: 1
fast_dev_run: False
accumulate_grad_batches: 1
max_epochs: 1
min_epochs: 1
max_steps: null
min_steps: null
limit_train_batches: 1.0
limit_val_batches: 1.0
limit_test_batches: 1.0
val_check_interval: 1.0
flush_logs_every_n_steps: 100
log_every_n_steps: 50
accelerator: null
sync_batchnorm: False
precision: 32
weights_summary: "top"
weights_save_path: null
num_sanity_val_steps: 2
truncated_bptt_steps: null
resume_from_checkpoint: null
profiler: null
benchmark: False
deterministic: False
reload_dataloaders_every_epoch: False
auto_lr_find: False
replace_sampler_ddp: True
terminate_on_nan: False
auto_scale_batch_size: False
prepare_data_per_node: True
plugins: null
amp_backend: "native"
amp_level: "O2"
move_metrics_to_cpu: False
defaults:
- default.yaml
accelerator: gpu
devices: 4
strategy: ddp
defaults:
- default.yaml
gpus: 0
min_epochs: 1
max_epochs: 2
# prints
weights_summary: "full"
profiler: null
# debugs
fast_dev_run: true
num_sanity_val_steps: 2
overfit_batches: 0
limit_train_batches: 1.0
limit_val_batches: 1.0
limit_test_batches: 1.0
track_grad_norm: -1
terminate_on_nan: true
_target_: pytorch_lightning.Trainer
# set `gpu` to train on GPU, null to train on CPU only
accelerator: null
min_epochs: 1
max_epochs: 1000
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment