{ "train_batch_size": 512, "gradient_accumulation_steps": 1, "steps_per_print": 1, "zero_optimization": { "stage": 0 }, "optimizer": { "type": "Adam", "params": { "lr": 0.00015, "max_grad_norm": 1.0, "betas": [0.9, 0.95] } }, "gradient_clipping": 1.0, "fp16": { "enabled": true, "loss_scale": 0, "loss_scale_window": 1000, "hysteresis": 2, "min_loss_scale": 1 }, "wall_clock_breakdown": false, "zero_allow_untested_optimizer": false, "curriculum_learning": { "enabled": true, "curriculum_type": "seqlen", "min_difficulty": 8, "max_difficulty": 1024, "schedule_type": "fixed_linear", "schedule_config": { "total_curriculum_step": 60000, "difficulty_step": 8 } } }