{ "train_batch_size": 2, "gradient_accumulation_steps": 1, "optimizer": { "type": "Adam", "params": {
"lr": 0.00015
} },
"gradient_clipping": 1.0,
"fp16": { "enabled": true, "loss_scale": 0 } }