Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
parler-tts
Commits
c734f3ec
Commit
c734f3ec
authored
Apr 08, 2024
by
Yoach Lacombe
Browse files
make style
parent
a664e0ca
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
8 additions
and
18 deletions
+8
-18
helpers/gradio_demo/app.py
helpers/gradio_demo/app.py
+1
-3
helpers/model_init_scripts/init_dummy_model.py
helpers/model_init_scripts/init_dummy_model.py
+2
-5
helpers/model_init_scripts/init_dummy_model_with_encodec.py
helpers/model_init_scripts/init_dummy_model_with_encodec.py
+1
-2
helpers/model_init_scripts/init_model_300M.py
helpers/model_init_scripts/init_model_300M.py
+3
-7
parler_tts/__init__.py
parler_tts/__init__.py
+1
-1
No files found.
helpers/gradio_demo/app.py
View file @
c734f3ec
...
...
@@ -95,9 +95,7 @@ with gr.Blocks(css=css) as block:
description
=
gr
.
Textbox
(
label
=
"Description"
,
lines
=
2
,
value
=
""
,
elem_id
=
"input_description"
)
run_button
=
gr
.
Button
(
"Generate Audio"
,
variant
=
"primary"
)
with
gr
.
Column
():
audio_out
=
gr
.
Audio
(
label
=
"Parler-TTS generation"
,
type
=
"numpy"
,
elem_id
=
"audio_out"
)
audio_out
=
gr
.
Audio
(
label
=
"Parler-TTS generation"
,
type
=
"numpy"
,
elem_id
=
"audio_out"
)
inputs
=
[
input_text
,
description
]
outputs
=
[
audio_out
]
...
...
helpers/model_init_scripts/init_dummy_model.py
View file @
c734f3ec
...
...
@@ -5,11 +5,11 @@ import argparse
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"save_directory"
,
type
=
str
,
help
=
"Directory where to save the model and the decoder."
)
parser
.
add_argument
(
"text_model"
,
type
=
str
,
help
=
"Repository id or path to the text encoder."
)
parser
.
add_argument
(
"audio_model"
,
type
=
str
,
help
=
"Repository id or path to the audio encoder."
)
args
=
parser
.
parse_args
()
text_model
=
args
.
text_model
...
...
@@ -22,7 +22,6 @@ if __name__ == "__main__":
num_codebooks
=
encodec
.
num_codebooks
print
(
"num_codebooks"
,
num_codebooks
)
decoder_config
=
ParlerTTSDecoderConfig
(
vocab_size
=
encodec_vocab_size
+
1
,
max_position_embeddings
=
2048
,
...
...
@@ -42,11 +41,9 @@ if __name__ == "__main__":
num_codebooks
=
num_codebooks
,
)
decoder
=
ParlerTTSForCausalLM
(
decoder_config
)
decoder
.
save_pretrained
(
os
.
path
.
join
(
args
.
save_directory
,
"decoder"
))
model
=
ParlerTTSForConditionalGeneration
.
from_sub_models_pretrained
(
text_encoder_pretrained_model_name_or_path
=
text_model
,
audio_encoder_pretrained_model_name_or_path
=
encodec_version
,
...
...
helpers/model_init_scripts/init_dummy_model_with_encodec.py
View file @
c734f3ec
...
...
@@ -4,7 +4,7 @@ import os
import
argparse
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"save_directory"
,
type
=
str
,
help
=
"Directory where to save the model and the decoder."
)
args
=
parser
.
parse_args
()
...
...
@@ -41,7 +41,6 @@ if __name__ == "__main__":
decoder
.
save_pretrained
(
os
.
path
.
join
(
args
.
save_directory
,
"decoder"
))
model
=
ParlerTTSForConditionalGeneration
.
from_sub_models_pretrained
(
text_encoder_pretrained_model_name_or_path
=
text_model
,
audio_encoder_pretrained_model_name_or_path
=
encodec_version
,
...
...
helpers/model_init_scripts/init_model_300M.py
View file @
c734f3ec
...
...
@@ -5,11 +5,11 @@ import argparse
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"save_directory"
,
type
=
str
,
help
=
"Directory where to save the model and the decoder."
)
parser
.
add_argument
(
"text_model"
,
type
=
str
,
help
=
"Repository id or path to the text encoder."
)
parser
.
add_argument
(
"audio_model"
,
type
=
str
,
help
=
"Repository id or path to the audio encoder."
)
args
=
parser
.
parse_args
()
text_model
=
args
.
text_model
...
...
@@ -22,7 +22,6 @@ if __name__ == "__main__":
num_codebooks
=
encodec
.
num_codebooks
print
(
"num_codebooks"
,
num_codebooks
)
decoder_config
=
ParlerTTSDecoderConfig
(
vocab_size
=
encodec_vocab_size
+
64
,
# + 64 instead of +1 to have a multiple of 64
max_position_embeddings
=
4096
,
# 30 s = 2580
...
...
@@ -42,11 +41,9 @@ if __name__ == "__main__":
num_codebooks
=
num_codebooks
,
)
decoder
=
ParlerTTSForCausalLM
(
decoder_config
)
decoder
.
save_pretrained
(
os
.
path
.
join
(
args
.
save_directory
,
"decoder"
))
model
=
ParlerTTSForConditionalGeneration
.
from_sub_models_pretrained
(
text_encoder_pretrained_model_name_or_path
=
text_model
,
audio_encoder_pretrained_model_name_or_path
=
encodec_version
,
...
...
@@ -64,5 +61,4 @@ if __name__ == "__main__":
model
.
generation_config
.
do_sample
=
True
# True
model
.
generation_config
.
guidance_scale
=
1
# 3.0
model
.
save_pretrained
(
os
.
path
.
join
(
args
.
save_directory
,
"stable-speech-untrained-300M/"
))
model
.
save_pretrained
(
os
.
path
.
join
(
args
.
save_directory
,
"stable-speech-untrained-300M/"
))
parler_tts/__init__.py
View file @
c734f3ec
...
...
@@ -13,4 +13,4 @@ from .dac_wrapper import DACConfig, DACModel
from
transformers
import
AutoConfig
,
AutoModel
AutoConfig
.
register
(
"dac"
,
DACConfig
)
AutoModel
.
register
(
DACConfig
,
DACModel
)
\ No newline at end of file
AutoModel
.
register
(
DACConfig
,
DACModel
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment