"vscode:/vscode.git/clone" did not exist on "1ff119c7b7e2bc0ce0fdf06abaa2e9930421a750"
deepspeed_config.json 695 Bytes
Newer Older
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
{
  "optimizer": {
    "type": "Adam",
    "params": {
      "lr": 0.001,
      "eps": 1e-05
    }
  },
  "scheduler": {
      "type": "WarmupLR",
      "params": {
        "warmup_min_lr": 0,
        "warmup_max_lr": 0.001,
        "warmup_num_steps": 128000
      }
  },
  "fp16": {
    "enabled": true,
    "min_loss_scale": 1
  },
  "amp": {
    "enabled": false,
    "opt_level": "O2"
  },
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
25
26
27
  "bfloat16": {
    "enabled": false
  },
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
28
  "zero_optimization": {
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
29
30
31
    "stage": 2,
    "cpu_offload": true,
    "contiguous_gradients": true
Gustaf Ahdritz's avatar
Gustaf Ahdritz committed
32
33
34
35
36
37
38
39
  },
  "activation_checkpointing": {
    "partition_activations": true,
    "cpu_checkpointing": false,
    "profile": false
  },
  "gradient_clipping": 0.1
}