README.md 5.55 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Summary

## Entry Points

Main entry points for vLLM-Omni inference and serving.

- [vllm_omni.entrypoints.async_omni.AsyncOmni][]
- [vllm_omni.entrypoints.async_omni_diffusion.AsyncOmniDiffusion][]
- [vllm_omni.entrypoints.async_omni_llm.AsyncOmniLLM][]
- [vllm_omni.entrypoints.chat_utils.OmniAsyncMultiModalContentParser][]
- [vllm_omni.entrypoints.chat_utils.OmniAsyncMultiModalItemTracker][]
- [vllm_omni.entrypoints.chat_utils.parse_chat_messages_futures][]
- [vllm_omni.entrypoints.cli.serve.OmniServeCommand][]
- [vllm_omni.entrypoints.client_request_state.ClientRequestState][]
- [vllm_omni.entrypoints.log_utils.OrchestratorMetrics][]
- [vllm_omni.entrypoints.log_utils.StageRequestMetrics][]
- [vllm_omni.entrypoints.log_utils.StageStats][]
- [vllm_omni.entrypoints.omni.Omni][]
- [vllm_omni.entrypoints.omni.OmniBase][]
- [vllm_omni.entrypoints.omni_diffusion.OmniDiffusion][]
- [vllm_omni.entrypoints.omni_llm.OmniLLM][]
- [vllm_omni.entrypoints.omni_stage.OmniStage][]
- [vllm_omni.entrypoints.stage_utils.OmniStageTaskType][]

## Inputs

Input data structures for multi-modal inputs.

- [vllm_omni.inputs.data.OmniEmbedsPrompt][]
- [vllm_omni.inputs.data.OmniTokenInputs][]
- [vllm_omni.inputs.data.OmniTokensPrompt][]
- [vllm_omni.inputs.parse.parse_singleton_prompt_omni][]
- [vllm_omni.inputs.preprocess.OmniInputPreprocessor][]

## Engine

Engine classes for offline and online inference.

- [vllm_omni.diffusion.diffusion_engine.DiffusionEngine][]
- [vllm_omni.engine.AdditionalInformationEntry][]
- [vllm_omni.engine.AdditionalInformationPayload][]
- [vllm_omni.engine.OmniEngineCoreOutput][]
- [vllm_omni.engine.OmniEngineCoreOutputs][]
- [vllm_omni.engine.OmniEngineCoreRequest][]
- [vllm_omni.engine.PromptEmbedsPayload][]
- [vllm_omni.engine.arg_utils.AsyncOmniEngineArgs][]
- [vllm_omni.engine.arg_utils.OmniEngineArgs][]
- [vllm_omni.engine.input_processor.OmniInputProcessor][]
- [vllm_omni.engine.output_processor.MultimodalOutputProcessor][]
- [vllm_omni.engine.output_processor.OmniRequestState][]

## Core

Core scheduling and caching components.

- [vllm_omni.core.sched.omni_ar_scheduler.KVCacheTransferData][]
- [vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler][]
- [vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler][]
- [vllm_omni.core.sched.output.OmniCachedRequestData][]
- [vllm_omni.core.sched.output.OmniNewRequestData][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.vq.core_vq.DistributedGroupResidualVectorQuantization][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.vq.core_vq.DistributedResidualVectorQuantization][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.vq.core_vq.EuclideanCodebook][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.vq.core_vq.VectorQuantization][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.vq.core_vq.preprocess][]

## Configuration

Configuration classes.

- [vllm_omni.config.model.OmniModelConfig][]
- [vllm_omni.diffusion.cache.teacache.config.TeaCacheConfig][]
- [vllm_omni.distributed.omni_connectors.utils.config.ConnectorSpec][]
- [vllm_omni.distributed.omni_connectors.utils.config.OmniTransferConfig][]
- [vllm_omni.model_executor.models.qwen3_tts.configuration_qwen3_tts.Qwen3TTSConfig][]
- [vllm_omni.model_executor.models.qwen3_tts.configuration_qwen3_tts.Qwen3TTSSpeakerEncoderConfig][]
- [vllm_omni.model_executor.models.qwen3_tts.configuration_qwen3_tts.Qwen3TTSTalkerCodePredictorConfig][]
- [vllm_omni.model_executor.models.qwen3_tts.configuration_qwen3_tts.Qwen3TTSTalkerConfig][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_12hz.configuration_qwen3_tts_tokenizer_v2.Qwen3TTSTokenizerV2Config][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_12hz.configuration_qwen3_tts_tokenizer_v2.Qwen3TTSTokenizerV2DecoderConfig][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.configuration_qwen3_tts_tokenizer_v1.Qwen3TTSTokenizerV1Config][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.configuration_qwen3_tts_tokenizer_v1.Qwen3TTSTokenizerV1DecoderBigVGANConfig][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.configuration_qwen3_tts_tokenizer_v1.Qwen3TTSTokenizerV1DecoderConfig][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.configuration_qwen3_tts_tokenizer_v1.Qwen3TTSTokenizerV1DecoderDiTConfig][]
- [vllm_omni.model_executor.models.qwen3_tts.tokenizer_25hz.configuration_qwen3_tts_tokenizer_v1.Qwen3TTSTokenizerV1EncoderConfig][]

## Workers

Worker classes and model runners for distributed inference.

- [vllm_omni.diffusion.worker.gpu_diffusion_model_runner.GPUDiffusionModelRunner][]
- [vllm_omni.diffusion.worker.gpu_diffusion_worker.GPUDiffusionWorker][]
- [vllm_omni.diffusion.worker.gpu_diffusion_worker.WorkerProc][]
- [vllm_omni.diffusion.worker.npu.npu_worker.NPUWorker][]
- [vllm_omni.diffusion.worker.npu.npu_worker.NPUWorkerProc][]
- [vllm_omni.worker.gpu_ar_model_runner.ExecuteModelState][]
- [vllm_omni.worker.gpu_ar_model_runner.GPUARModelRunner][]
- [vllm_omni.worker.gpu_ar_worker.GPUARWorker][]
- [vllm_omni.worker.gpu_generation_model_runner.GPUGenerationModelRunner][]
- [vllm_omni.worker.gpu_generation_worker.GPUGenerationWorker][]
- [vllm_omni.worker.gpu_model_runner.OmniGPUModelRunner][]
- [vllm_omni.worker.npu.npu_ar_model_runner.ExecuteModelState][]
- [vllm_omni.worker.npu.npu_ar_model_runner.NPUARModelRunner][]
- [vllm_omni.worker.npu.npu_ar_worker.NPUARWorker][]
- [vllm_omni.worker.npu.npu_generation_model_runner.NPUGenerationModelRunner][]
- [vllm_omni.worker.npu.npu_generation_worker.NPUGenerationWorker][]
- [vllm_omni.worker.npu.npu_model_runner.OmniNPUModelRunner][]