f5-tts_infer-cli --model"F5-TTS"--load_vocoder_from_local--ref_audio"input0.wav"--ref_text"The content, subtitle or transcription of reference audio."--gen_text"Some text you want TTS model generate for you."
# f5-tts_infer-cli --model "F5-TTS" --load_vocoder_from_local --ref_audio "input0.wav" --gen_text "Even when running the whisper-base, it's still very slow, compared to what it should be (on a 4090 GPU)"
f5-tts_infer-cli --model"F5-TTS"--load_vocoder_from_local--ref_audio"input.wav"--ref_text"The content, subtitle or transcription of reference audio."--gen_text"Some text you want TTS model generate for you."
# f5-tts_infer-cli --model "F5-TTS" --load_vocoder_from_local --ref_audio "input.wav" --gen_text "Even when running the whisper-base, it's still very slow, compared to what it should be (on a 4090 GPU)"