{ "fp16": { "enabled": true, "loss_scale": 0, "loss_scale_window": 1000, "initial_scale_power": 16, "hysteresis": 2, "min_loss_scale": 1 }, "zero_optimization": { "stage": 3, "cpu_offload": true, "cpu_offload_params": true, "cpu_offload_use_pin_memory" : true, "overlap_comm": true, "contiguous_gradients": true, "sub_group_size": 1e14, "reduce_bucket_size": 0, "stage3_prefetch_bucket_size": 0, "stage3_param_persistence_threshold": 0, "stage3_max_live_parameters": 1e9, "stage3_max_reuse_distance": 1e9, "stage3_gather_fp16_weights_on_model_save": true }, "optimizer": { "type": "AdamW", "params": { "lr": 3e-5, "betas": [0.8, 0.999], "eps": 1e-8, "weight_decay": 3e-7 } }, "scheduler": { "type": "WarmupLR", "params": { "warmup_min_lr": 0, "warmup_max_lr": 3e-5, "warmup_num_steps": 500 } }, "steps_per_print": 2000, "wall_clock_breakdown": false }