.gitignore .gitmodules LICENSE README.md requirements_ds_gpu.txt setup.py web_demo.py web_demo_stream.py Kimi-Audio-Evalkit/run_vita_audio.sh Kimi-Audio-Evalkit/almeval/models/vita_audio.py asset/2631296891109983590.wav asset/379838640-d5ff0815-74f8-4738-b0f1-477cfc8dcc2d.wav asset/4202818730519913143.wav asset/logo.png asset/piano.mp3 asset/qa_speed.gif asset/tts_speed.gif asset/vita-audio_logo.jpg asset/wechat-group.jpg asset/介绍一下上海.wav asset/他本是我莲花池里养大的金鱼.wav asset/发表一个悲伤的演讲.wav asset/发表一个振奋人心的演讲.wav configs/sts_finetune_stage1.yaml configs/sts_finetune_stage2.yaml evaluation/compute-acc-of-contain.py evaluation/compute-cer.py evaluation/compute-wer.py evaluation/evaluate_asr.py evaluation/evaluate_libritts.py evaluation/evaluate_seedtts.py evaluation/evaluate_sqa.py evaluation/get_chat_template.py scripts/set_env_ds_gpu.sh scripts/deepspeed/ds_config_zero1.json scripts/deepspeed/ds_config_zero2.json scripts/deepspeed/ds_config_zero2_no_optimizer.json scripts/deepspeed/ds_config_zero2_offload.json scripts/deepspeed/ds_config_zero3.json scripts/deepspeed/ds_config_zero3_offload.json scripts/deepspeed/evaluate_sts.sh scripts/deepspeed/sts_qwen25/finetune_glm4voice_mtp10_stage1.sh scripts/deepspeed/sts_qwen25/finetune_glm4voice_mtp10_stage2.sh scripts/deepspeed/sts_qwen25/finetune_glm4voice_mtp1_stage1.sh scripts/deepspeed/sts_qwen25/finetune_glm4voice_stage1.sh scripts/deepspeed/sts_qwen25/finetune_sensevoice_glm4voice_mtp10_stage1.sh scripts/deepspeed/sts_qwen25/finetune_sensevoice_glm4voice_mtp10_stage2.sh scripts/deepspeed/sts_qwen25/finetune_sensevoice_glm4voice_mtp1_stage1.sh scripts/deepspeed/sts_qwen25/finetune_sensevoice_glm4voice_stage1.sh tools/finetune_sts_v4_48_3.py tools/get_neural_audio_codecs.py tools/inference_sts.py tools/trainer_v4_48_3.py vita_audio/__init__.py vita_audio/constants.py vita_audio/tokenizer.py vita_audio/tokenizer_cosyvoice2.py vita_audio/tokenizer_glm4voice.py vita_audio/tokenizer_sensevoice_glm4voice.py vita_audio/tokenizer_sensevoice_sparktts.py vita_audio/tokenizer_snac.py vita_audio.egg-info/PKG-INFO vita_audio.egg-info/SOURCES.txt vita_audio.egg-info/dependency_links.txt vita_audio.egg-info/top_level.txt vita_audio/data/__init__.py vita_audio/data/build.py vita_audio/data/data_collator.py vita_audio/data/dataset_base.py vita_audio/data/dataset_cosyvoice2.py vita_audio/data/dataset_deepseek.py vita_audio/data/dataset_hunyuan.py vita_audio/data/dataset_llama3.py vita_audio/data/dataset_mistral.py vita_audio/data/dataset_qwen2.py vita_audio/data/utils.py vita_audio/data/processor/__init__.py vita_audio/data/processor/audio_processor.py vita_audio/data/processor/image_processor.py vita_audio/models/__init__.py vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/__init__.py vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/config_7B_mtp0.json vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/config_7B_mtp1.json vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/config_7B_mtp10.json vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/configuration_qwen2.py vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/modeling_qwen2.py vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/modeling_sensevoice.py vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/modular_qwen2.py vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/resampler_projector.py vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/tokenization_qwen2.py vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/tokenization_qwen2_fast.py vita_audio/models/qwen2_mtp_sensevoice_v4_48_3/tokenizer_config.json vita_audio/models/qwen2_mtp_v4_48_3/__init__.py vita_audio/models/qwen2_mtp_v4_48_3/config_7B_mtp1.json vita_audio/models/qwen2_mtp_v4_48_3/config_7B_mtp10.json vita_audio/models/qwen2_mtp_v4_48_3/configuration_qwen2.py vita_audio/models/qwen2_mtp_v4_48_3/generation_config.json vita_audio/models/qwen2_mtp_v4_48_3/modeling_qwen2.py vita_audio/models/qwen2_mtp_v4_48_3/modular_qwen2.py vita_audio/models/qwen2_mtp_v4_48_3/tokenization_qwen2.py vita_audio/models/qwen2_mtp_v4_48_3/tokenization_qwen2_fast.py vita_audio/models/qwen2_mtp_v4_48_3/tokenizer_config.json vita_audio/models/qwen2_v4_48_3/__init__.py vita_audio/models/qwen2_v4_48_3/configuration_qwen2.py vita_audio/models/qwen2_v4_48_3/modeling_qwen2.py vita_audio/models/qwen2_v4_48_3/modular_qwen2.py vita_audio/models/qwen2_v4_48_3/tokenization_qwen2.py vita_audio/models/qwen2_v4_48_3/tokenization_qwen2_fast.py web/parms.py web/pem.py web/pool.py web/queue.py web/requirements.txt web/vad.py web/resources/index.html web/resources/silero_vad.jit web/resources/silero_vad.onnx