"vscode:/vscode.git/clone" did not exist on "8f5b7fce849b64216068c087207343c81c3776ed"
Commit aedfc254 authored by weishb's avatar weishb
Browse files

更新vllm版本到0.18.1,搭配vllm-omni 0.18.0版本

parent 84b8a706
...@@ -21,11 +21,11 @@ Qwen3-TTS 覆盖10种主要语言(中文、英文、日文、韩文、德文 ...@@ -21,11 +21,11 @@ Qwen3-TTS 覆盖10种主要语言(中文、英文、日文、韩文、德文
| DTK | 26.04 | | DTK | 26.04 |
| python | 3.10.12 | | python | 3.10.12 |
| transformers | 4.57.6 | | transformers | 4.57.6 |
| vllm | 0.15.1+das.opt1.alpha.dtk2604 | | vllm | 0.18.1+das.dtk2604 |
| torchaudio | torchaudio-2.9.0+das.opt1.dtk2604.20260206.g275d08c2 | | torchaudio | 2.10.0 |
| vllm-omni | 0.15.1+fix1 | | vllm-omni | 0.18.0 |
推荐使用镜像:harbor.sourcefind.cn:5443/dcu/admin/base/custom:vllm0.15.1-ubuntu22.04-dtk26.04-0130-py3.10-20260220 推荐使用镜像:harbor.sourcefind.cn:5443/dcu/admin/base/custom:vllm018-ubuntu22.04-dtk26.04-nemotron-20260422
```bash ```bash
docker run -it \ docker run -it \
...@@ -42,7 +42,7 @@ docker run -it \ ...@@ -42,7 +42,7 @@ docker run -it \
-u root \ -u root \
-v /opt/hyhal/:/opt/hyhal/:ro \ -v /opt/hyhal/:/opt/hyhal/:ro \
-v /path/your_code_data/:/path/your_code_data/ \ -v /path/your_code_data/:/path/your_code_data/ \
harbor.sourcefind.cn:5443/dcu/admin/base/custom:vllm0.15.1-ubuntu22.04-dtk26.04-0130-py3.10-20260220 bash harbor.sourcefind.cn:5443/dcu/admin/base/custom:vllm018-ubuntu22.04-dtk26.04-nemotron-20260422 bash
``` ```
更多镜像可前往[光源](https://sourcefind.cn/#/service-list)下载使用。 更多镜像可前往[光源](https://sourcefind.cn/#/service-list)下载使用。
...@@ -53,11 +53,8 @@ pip install -r requirements.txt ...@@ -53,11 +53,8 @@ pip install -r requirements.txt
镜像内其他环境配置 镜像内其他环境配置
``` ```
pip uninstall vllm pip install vllm_omni==0.18.0
pip install vllm-0.15.1+das.opt1.alpha.dtk2604-cp310-cp310-linux_x86_64.whl --no-deps pip install torchaudio-2.10.0-cp310-cp310-linux_x86_64.whl --no-deps
pip install vllm_omni-0.15.1+fix1-py3-none-any.whl
pip install torchaudio-2.9.0+das.opt1.dtk2604.20260206.g275d08c2-cp310-cp310-linux_x86_64.whl --no-deps
pip install pycountry
``` ```
......
async_chunk: true
stage_args: stage_args:
- stage_id: 0 - stage_id: 0
stage_type: llm # Use llm stage type to launch OmniLLM stage_type: llm
is_comprehension: true
runtime: runtime:
devices: "0" devices: "0"
max_batch_size: 1
engine_args: engine_args:
model_stage: qwen3_tts model_stage: qwen3_tts
model_arch: Qwen3TTSForConditionalGeneration max_num_seqs: 10
model_arch: Qwen3TTSTalkerForConditionalGeneration
worker_type: ar
scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler
enforce_eager: false
trust_remote_code: true
async_scheduling: true
enable_prefix_caching: false
engine_output_type: latent
gpu_memory_utilization: 0.3
distributed_executor_backend: "mp"
max_num_batched_tokens: 512
max_model_len: 4096
custom_process_next_stage_input_func: vllm_omni.model_executor.stage_input_processors.qwen3_tts.talker2code2wav_async_chunk
# Use named connector to apply runtime.connectors.extra.
output_connectors:
to_stage_1: connector_of_shared_memory
default_sampling_params:
temperature: 0.9
top_k: 50
max_tokens: 4096
seed: 42
detokenize: false
repetition_penalty: 1.05
stop_token_ids: [2150]
- stage_id: 1
stage_type: llm
runtime:
devices: "0"
engine_args:
model_stage: code2wav
max_num_seqs: 1
model_arch: Qwen3TTSCode2Wav
worker_type: generation worker_type: generation
scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler
enforce_eager: true enforce_eager: true
trust_remote_code: true trust_remote_code: true
async_scheduling: false async_scheduling: true
enable_prefix_caching: false enable_prefix_caching: false
engine_output_type: audio # Final output: audio waveform engine_output_type: audio
gpu_memory_utilization: 0.1 gpu_memory_utilization: 0.3
distributed_executor_backend: "mp" distributed_executor_backend: "mp"
max_num_batched_tokens: 1000000 # Must be divisible by num_code_groups and cover (left_context + chunk).
max_num_batched_tokens: 8192
# async_chunk appends windows per step; max_model_len must cover accumulated stream.
max_model_len: 32768
engine_input_source: [0]
final_output: true final_output: true
final_output_type: audio final_output_type: audio
# Distributed connector configuration
input_connectors:
from_stage_0: connector_of_shared_memory
tts_args:
max_instructions_length: 500
default_sampling_params:
temperature: 0.0
top_p: 1.0
top_k: -1
max_tokens: 65536
seed: 42
detokenize: true
repetition_penalty: 1.0
runtime:
enabled: true
defaults:
window_size: -1
max_inflight: 1
connectors:
connector_of_shared_memory:
name: SharedMemoryConnector
extra:
shm_threshold_bytes: 65536
# Frame-aligned codec streaming transport.
codec_streaming: true
# Connector polling / timeout (unit: loop count, sleep interval in seconds).
connector_get_sleep_s: 0.01
connector_get_max_wait_first_chunk: 3000
connector_get_max_wait: 300
# Align with Omni: small chunks with sufficient context overlap.
codec_chunk_frames: 25
codec_left_context_frames: 25
edges:
- from: 0
to: 1
window_size: -1
\ No newline at end of file
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment