sambert_16k_MAS.yaml 3.35 KB
Newer Older
liugh5's avatar
liugh5 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
model_type: sambert
Model:
#########################################################
#         SAMBERT NETWORK ARCHITECTURE SETTING          #
#########################################################
  KanTtsSAMBERT:
    params:
        max_len: 800

        embedding_dim: 512 
        encoder_num_layers: 8
        encoder_num_heads: 8
        encoder_num_units: 128
        encoder_ffn_inner_dim: 1024
        encoder_dropout: 0.1
        encoder_attention_dropout: 0.1
        encoder_relu_dropout: 0.1
        encoder_projection_units: 32

        speaker_units: 32
        emotion_units: 32

        predictor_filter_size: 41
        predictor_fsmn_num_layers: 3
        predictor_num_memory_units: 128
        predictor_ffn_inner_dim: 256
        predictor_dropout: 0.1
        predictor_shift: 0
        predictor_lstm_units: 128
        dur_pred_prenet_units: [128, 128]
        dur_pred_lstm_units: 128

        decoder_prenet_units: [256, 256]
        decoder_num_layers: 12
        decoder_num_heads: 8
        decoder_num_units: 128
        decoder_ffn_inner_dim: 1024
        decoder_dropout: 0.1
        decoder_attention_dropout: 0.1
        decoder_relu_dropout: 0.1

        outputs_per_step: 3
        num_mels: 80

        postnet_filter_size: 41
        postnet_fsmn_num_layers: 4
        postnet_num_memory_units: 256
        postnet_ffn_inner_dim: 512
        postnet_dropout: 0.1
        postnet_shift: 17
        postnet_lstm_units: 128

        MAS: True
          

    optimizer:
      type: Adam
      params:
        lr: 0.001
        betas: [0.9, 0.98]
        eps: 1.0e-9
        weight_decay: 0.0
    scheduler:
      type: NoamLR
      params:
        warmup_steps: 4000

linguistic_unit: 
  cleaners: english_cleaners
  lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
  speaker_list: F7
####################################################
#                   LOSS SETTING                   #
####################################################
Loss:
  MelReconLoss:
    enable: True
    params:
      loss_type: mae

  ProsodyReconLoss:
    enable: True
    params:
      loss_type: mae

  AttentionCTCLoss:
    enable: True

  AttentionBinarizationLoss:
    enable: True
    params:
      start_epoch: 0
      warmup_epoch: 100


###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 32              
pin_memory: False            
num_workers: 4 # FIXME: set > 0 may stuck on macos              
remove_short_samples: False 
allow_cache: True           

grad_norm: 1.0

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
train_max_steps: 1000000           # Number of training steps.
save_interval_steps: 20000         # Interval steps to save checkpoint.
eval_interval_steps: 10000          # Interval steps to evaluate the network.
log_interval_steps: 1000            # Interval steps to record the training log.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_save_intermediate_results: 4  # Number of results to be saved as intermediate results.