Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
parler-tts
Commits
226fe07f
Commit
226fe07f
authored
Feb 21, 2024
by
Yoach Lacombe
Browse files
update example config + add script to init dummy model
parent
fc66e60b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
52 additions
and
6 deletions
+52
-6
example_configs/librispeech_tts_r.json
example_configs/librispeech_tts_r.json
+9
-6
init_dummy_model.py
init_dummy_model.py
+43
-0
No files found.
example_configs/librispeech_tts_r.json
View file @
226fe07f
{
"model_name_or_path"
:
"ylacombe/musicgen-melody"
,
"model_name_or_path"
:
"/home/yoach/dataspeech/artefacts/tiny-model/"
,
"feature_extractor_name"
:
"facebook/encodec_24khz"
,
"description_tokenizer_name"
:
"t5-base"
,
"prompt_tokenizer_name"
:
"t5-base"
,
"push_to_hub"
:
false
,
"hub_model_id"
:
"stable-speech-mini"
,
...
...
@@ -18,8 +21,8 @@
"eval_split_name"
:
"test.clean+test.other"
,
"target_audio_column_name"
:
"audio"
,
"description_column_name"
:
"text"
,
"prompt_column_name"
:
"text
_description
"
,
"description_column_name"
:
"text
_description
"
,
"prompt_column_name"
:
"text"
,
"max_train_samples"
:
1000
,
"max_eval_samples"
:
200
,
...
...
@@ -31,7 +34,7 @@
"add_audio_samples_to_wandb"
:
true
,
"id_column_name"
:
"id"
,
"preprocessing_num_workers"
:
24
,
"preprocessing_num_workers"
:
1
,
"pad_token_id"
:
2048
,
"decoder_start_token_id"
:
2048
,
...
...
@@ -40,7 +43,7 @@
"num_train_epochs"
:
20
,
"gradient_accumulation_steps"
:
1
,
"gradient_checkpointing"
:
true
,
"per_device_train_batch_size"
:
2
,
"per_device_train_batch_size"
:
16
,
"learning_rate"
:
1e-6
,
"adam_beta1"
:
0.9
,
"adam_beta2"
:
0.95
,
...
...
@@ -53,7 +56,7 @@
"predict_with_generate"
:
true
,
"include_inputs_for_metrics"
:
true
,
"evaluation_strategy"
:
"epoch"
,
"per_device_eval_batch_size"
:
2
,
"per_device_eval_batch_size"
:
16
,
"generation_max_length"
:
400
,
"fp16"
:
true
,
...
...
init_dummy_model.py
0 → 100644
View file @
226fe07f
from
stable_speech
import
StableSpeechConfig
,
StableSpeechForCausalLM
,
StableSpeechForConditionalGeneration
,
StableSpeechDecoderConfig
from
transformers
import
T5Config
,
EncodecConfig
decoder_config
=
StableSpeechDecoderConfig
(
max_position_embeddings
=
1024
,
num_hidden_layers
=
2
,
ffn_dim
=
256
,
num_attention_heads
=
4
,
layerdrop
=
0.0
,
use_cache
=
True
,
activation_function
=
"gelu"
,
hidden_size
=
256
,
dropout
=
0.1
,
attention_dropout
=
0.1
,
activation_dropout
=
0.1
,
)
# TODO: ?? how to make it stop ?
decoder
=
StableSpeechForCausalLM
(
decoder_config
)
decoder
.
save_pretrained
(
"/home/yoach/dataspeech/artefacts/decoder/"
)
model
=
StableSpeechForConditionalGeneration
.
from_sub_models_pretrained
(
text_encoder_pretrained_model_name_or_path
=
"t5-base"
,
audio_encoder_pretrained_model_name_or_path
=
"facebook/encodec_32khz"
,
decoder_pretrained_model_name_or_path
=
"/home/yoach/dataspeech/artefacts/decoder/"
,
)
# set the appropriate bos/pad token ids
model
.
generation_config
.
decoder_start_token_id
=
2048
model
.
generation_config
.
pad_token_id
=
2048
# set other default generation config params
model
.
generation_config
.
max_length
=
int
(
30
*
model
.
audio_encoder
.
config
.
frame_rate
)
model
.
generation_config
.
do_sample
=
True
model
.
generation_config
.
guidance_scale
=
3.0
model
.
save_pretrained
(
"/home/yoach/dataspeech/artefacts/tiny-model/"
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment