stage_args: - stage_id: 0 stage_type: llm # Use llm stage type to launch OmniLLM runtime: devices: "0" max_batch_size: 1 engine_args: model_stage: qwen3_tts model_arch: Qwen3TTSForConditionalGeneration worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true trust_remote_code: true async_scheduling: false enable_prefix_caching: false engine_output_type: audio # Final output: audio waveform gpu_memory_utilization: 0.1 distributed_executor_backend: "mp" max_num_batched_tokens: 1000000 final_output: true final_output_type: audio