univnet.yaml 2.45 KB
Newer Older
wxj's avatar
wxj committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# This config contains the default values for training UnivNet model on LJSpeech dataset.
# If you want to train model on other dataset, you can change config values according to your dataset.
# Most dataset-specific arguments are in the head of the config file, see below.

name: "UnivNet"

train_dataset: ???
validation_datasets: ???

# Default values for dataset with sample_rate=22050
sample_rate: 22050
n_mel_channels: 80
n_window_size: 1024
n_window_stride: 256
n_fft: 1024
lowfreq: 0
highfreq: 8000
window: hann

train_n_segments: 8192
train_max_duration: null
train_min_duration: 0.75

val_n_segments: 66048
val_max_duration: null
val_min_duration: 3

defaults:
  - model/generator: c32
  - model/train_ds: train_ds
  - model/validation_ds: val_ds

model:
  discriminator:
    mpd:
      periods: [2,3,5,7,11]
      kernel_size: 5
      stride: 3
      use_spectral_norm: false
      lrelu_slope: 0.2
    mrd:
      resolutions: [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]] # (filter_length, hop_length, win_length)
      use_spectral_norm: false
      lrelu_slope: 0.2
  preprocessor:
    _target_: nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures
    nfilt: ${n_mel_channels}
    lowfreq: ${lowfreq}
    highfreq: ${highfreq}
    n_fft: ${n_fft}
    n_window_size: ${n_window_size}
    n_window_stride: ${n_window_stride}
    pad_to: 0
    pad_value: -11.52
    sample_rate: ${sample_rate}
    window: ${window}
    normalize: null
    preemph: null
    dither: 0.0
    frame_splicing: 1
    log: true
    log_zero_guard_type: clamp
    log_zero_guard_value: 1e-05
    mag_power: 1.0
    use_grads: false
    exact_pad: true

  optim:
    _target_: torch.optim.AdamW
    lr: 0.0001
    betas: [0.5, 0.9]

  max_steps: 1000000
  stft_lamb: 2.5
  denoise_strength: 0.0025

trainer:
  num_nodes: 1
  devices: 1
  accelerator: gpu
  strategy: ddp_find_unused_parameters_true
  precision: 32
  max_steps: ${model.max_steps}
  accumulate_grad_batches: 1
  enable_checkpointing: False # Provided by exp_manager
  logger: false # Provided by exp_manager
  log_every_n_steps: 100
  check_val_every_n_epoch: 10
  benchmark: false

exp_manager:
  exp_dir: null
  name: ${name}
  create_tensorboard_logger: true
  create_checkpoint_callback: true
  checkpoint_callback_params:
    monitor: val_loss
    mode: min
  create_wandb_logger: false
  wandb_logger_kwargs:
    name: null
    project: null
    entity: null
  resume_if_exists: false
  resume_ignore_no_checkpoint: false