#  从语音生成 prompt
python fish_speech/models/vqgan/inference.py \
    -i "example.wav" \
    --checkpoint-path "checkpoints/fish-speech-1.5/firefly-gan-vq-fsq-8x1024-21hz-generator.pth"

# 从文本生成语义 token
python fish_speech/models/text2semantic/inference.py \
    --text "富人优先考虑的都是利益,而穷人优先考虑的永远都是感情和面子,穷人是小心翼翼的大方,而富人却是大大方方的小气。 " \
    --prompt-text "The text corresponding to reference audio" \
    --prompt-tokens "fake.npy" \
    --checkpoint-path "checkpoints/fish-speech-1.5" \
    --num-samples 1 \
    # --compile

# 从语义 token 生成人声
python fish_speech/models/vqgan/inference.py \
    -i "temp/codes_0.npy" \
    --checkpoint-path "checkpoints/fish-speech-1.5/firefly-gan-vq-fsq-8x1024-21hz-generator.pth"