"megatron/vscode:/vscode.git/clone" did not exist on "8dbd075743a185547b9eb1491771df754757de1b"
training_params.yaml 2.21 KB
Newer Older
Sugon_ldc's avatar
Sugon_ldc committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# @package _group_
common:
  no_progress_bar: false
  log_interval: 100
  log_format: null
  tensorboard_logdir: null
  seed: 1
  cpu: false
  fp16: false
  memory_efficient_fp16: false
  fp16_no_flatten_grads: false
  fp16_init_scale: 128
  fp16_scale_window: null
  fp16_scale_tolerance: 0.0
  min_loss_scale: 1.0e-4
  threshold_loss_scale: null
  user_dir: null
  empty_cache_freq: 0
  all_gather_list_size: 16384
  model_parallel_size: 1
  checkpoint_suffix: ""
  quantization_config_path: null
distributed_training:
  distributed_rank: 0
  distributed_backend: "nccl"
  distributed_init_method: null
  distributed_port: -1
  device_id: 0
  local_rank: 0
  distributed_no_spawn: false
  ddp_backend: "c10d"
  bucket_cap_mb: 25
  fix_batches_to_gpus: false
  find_unused_parameters: false
  fast_stat_sync: false
  broadcast_buffers: false
  distributed_wrapper: "DDP"
  slowmo_momentum: null
  slowmo_algorithm: "LocalSGD"
  localsgd_frequency: 3
dataset:
  num_workers: 1
  skip_invalid_size_inputs_valid_test: false
  max_tokens: null
  batch_size: ${params.dataset.batch_size}
  required_batch_size_multiple: 8
  dataset_impl: null
  data_buffer_size: 10
  train_subset: "train"
  valid_subset: "valid"
  validate_interval: 1
  fixed_validation_seed: null
  disable_validation: false
  curriculum: 0
  gen_subset: "test"
  num_shards: 1
  shard_id: 0
  max_tokens_valid: ${params.dataset.max_tokens}
  batch_size_valid: ${params.dataset.batch_size}
optimization:
  max_epoch: 0
  max_update: 0
  clip_norm: 25.0
  sentence_avg: false
  update_freq: [1]
  lr: [0.25]
  min_lr: -1.0
  use_bmuf: false
checkpoint:
  save_dir: "checkpoints"
  restore_file: "checkpoint_last.pt"
  reset_dataloader: false
  reset_lr_scheduler: false
  reset_meters: false
  reset_optimizer: false
  optimizer_overrides: "{}"
  save_interval: 1
  save_interval_updates: 0
  keep_interval_updates: -1
  keep_last_epochs: -1
  keep_best_checkpoints: -1
  no_save: false
  no_epoch_checkpoints: false
  no_last_checkpoints: false
  no_save_optimizer_state: false
  best_checkpoint_metric: "loss"
  maximize_best_checkpoint_metric: false
  patience: -1
bmuf:
  block_lr: 1
  block_momentum: 0.875
  global_sync_iter: 50
  warmup_iterations: 500
  use_nbm: false
  average_sync: false