deepspeed_config.json 555 Bytes
Newer Older
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
1
2
{
  "fp16": {
3
    "enabled": false,
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
4
5
6
7
8
9
    "min_loss_scale": 1
  },
  "amp": {
    "enabled": false,
    "opt_level": "O2"
  },
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
10
  "bfloat16": {
11
    "enabled": true
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
12
  },
13
14
15
16
17
18
19
  "optimizer": {
    "type": "Adam",
    "params": {
      "lr": 1e-3,
      "eps": 1e-5
    }
  },
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
20
  "zero_optimization": {
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
21
    "stage": 2,
22
23
24
    "offload_optimizer": {
      "device": "cpu"
    },
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
25
    "contiguous_gradients": true
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
26
27
28
29
30
31
32
33
  },
  "activation_checkpointing": {
    "partition_activations": true,
    "cpu_checkpointing": false,
    "profile": false
  },
  "gradient_clipping": 0.1
}