# Audio processing configs

audio_config:
  # Preprocess
  wav_normalize: True
  trim_silence: True
  trim_silence_threshold_db: 60
  preemphasize: False

  # Feature extraction
  sampling_rate: 48000
  hop_length: 600
  win_length: 2400
  n_fft: 4096
  n_mels: 128
  fmin: 0.0
  fmax: 12000.0
  phone_level_feature: True

  # Normalization
  norm_type: "mean_std"  # "mean_std" or "global"
  max_norm: 1.0
  symmetric: False
  min_level_db: -100.0
  ref_level_db: 20
  
  num_workers: 16