Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4bdf7ac5
Unverified
Commit
4bdf7ac5
authored
Oct 09, 2025
by
Cyrus Leung
Committed by
GitHub
Oct 09, 2025
Browse files
[Bugfix] Fix SHM cache initialization (#26427)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
dc7976dd
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
54 additions
and
66 deletions
+54
-66
vllm/entrypoints/openai/serving_responses.py
vllm/entrypoints/openai/serving_responses.py
+1
-4
vllm/entrypoints/openai/serving_score.py
vllm/entrypoints/openai/serving_score.py
+0
-3
vllm/entrypoints/openai/serving_tokenization.py
vllm/entrypoints/openai/serving_tokenization.py
+0
-3
vllm/entrypoints/openai/serving_transcription.py
vllm/entrypoints/openai/serving_transcription.py
+0
-5
vllm/entrypoints/openai/speech_to_text.py
vllm/entrypoints/openai/speech_to_text.py
+1
-4
vllm/inputs/data.py
vllm/inputs/data.py
+4
-4
vllm/inputs/preprocess.py
vllm/inputs/preprocess.py
+7
-10
vllm/v1/engine/async_llm.py
vllm/v1/engine/async_llm.py
+19
-16
vllm/v1/engine/llm_engine.py
vllm/v1/engine/llm_engine.py
+20
-17
vllm/v1/engine/processor.py
vllm/v1/engine/processor.py
+2
-0
No files found.
vllm/entrypoints/openai/serving_responses.py
View file @
4bdf7ac5
...
@@ -49,7 +49,6 @@ from openai.types.responses.response_reasoning_item import (
...
@@ -49,7 +49,6 @@ from openai.types.responses.response_reasoning_item import (
from
openai_harmony
import
Message
as
OpenAIHarmonyMessage
from
openai_harmony
import
Message
as
OpenAIHarmonyMessage
from
vllm
import
envs
from
vllm
import
envs
from
vllm.config
import
ModelConfig
from
vllm.engine.protocol
import
EngineClient
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.chat_utils
import
(
from
vllm.entrypoints.chat_utils
import
(
ChatCompletionMessageParam
,
ChatCompletionMessageParam
,
...
@@ -109,7 +108,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -109,7 +108,6 @@ class OpenAIServingResponses(OpenAIServing):
def
__init__
(
def
__init__
(
self
,
self
,
engine_client
:
EngineClient
,
engine_client
:
EngineClient
,
model_config
:
ModelConfig
,
models
:
OpenAIServingModels
,
models
:
OpenAIServingModels
,
*
,
*
,
request_logger
:
Optional
[
RequestLogger
],
request_logger
:
Optional
[
RequestLogger
],
...
@@ -127,7 +125,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -127,7 +125,6 @@ class OpenAIServingResponses(OpenAIServing):
)
->
None
:
)
->
None
:
super
().
__init__
(
super
().
__init__
(
engine_client
=
engine_client
,
engine_client
=
engine_client
,
model_config
=
model_config
,
models
=
models
,
models
=
models
,
request_logger
=
request_logger
,
request_logger
=
request_logger
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
...
@@ -176,7 +173,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -176,7 +173,7 @@ class OpenAIServingResponses(OpenAIServing):
"the store."
"the store."
)
)
self
.
use_harmony
=
model_config
.
hf_config
.
model_type
==
"gpt_oss"
self
.
use_harmony
=
self
.
model_config
.
hf_config
.
model_type
==
"gpt_oss"
if
self
.
use_harmony
:
if
self
.
use_harmony
:
logger
.
warning
(
logger
.
warning
(
"For gpt-oss, we ignore --enable-auto-tool-choice "
"For gpt-oss, we ignore --enable-auto-tool-choice "
...
...
vllm/entrypoints/openai/serving_score.py
View file @
4bdf7ac5
...
@@ -7,7 +7,6 @@ from typing import Any, Optional, Union
...
@@ -7,7 +7,6 @@ from typing import Any, Optional, Union
from
fastapi
import
Request
from
fastapi
import
Request
from
vllm.config
import
ModelConfig
from
vllm.engine.protocol
import
EngineClient
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.logger
import
RequestLogger
from
vllm.entrypoints.logger
import
RequestLogger
from
vllm.entrypoints.openai.protocol
import
(
from
vllm.entrypoints.openai.protocol
import
(
...
@@ -47,7 +46,6 @@ class ServingScores(OpenAIServing):
...
@@ -47,7 +46,6 @@ class ServingScores(OpenAIServing):
def
__init__
(
def
__init__
(
self
,
self
,
engine_client
:
EngineClient
,
engine_client
:
EngineClient
,
model_config
:
ModelConfig
,
models
:
OpenAIServingModels
,
models
:
OpenAIServingModels
,
*
,
*
,
request_logger
:
Optional
[
RequestLogger
],
request_logger
:
Optional
[
RequestLogger
],
...
@@ -55,7 +53,6 @@ class ServingScores(OpenAIServing):
...
@@ -55,7 +53,6 @@ class ServingScores(OpenAIServing):
)
->
None
:
)
->
None
:
super
().
__init__
(
super
().
__init__
(
engine_client
=
engine_client
,
engine_client
=
engine_client
,
model_config
=
model_config
,
models
=
models
,
models
=
models
,
request_logger
=
request_logger
,
request_logger
=
request_logger
,
log_error_stack
=
log_error_stack
,
log_error_stack
=
log_error_stack
,
...
...
vllm/entrypoints/openai/serving_tokenization.py
View file @
4bdf7ac5
...
@@ -6,7 +6,6 @@ from typing import Any, Final, Optional, Union
...
@@ -6,7 +6,6 @@ from typing import Any, Final, Optional, Union
import
jinja2
import
jinja2
from
fastapi
import
Request
from
fastapi
import
Request
from
vllm.config
import
ModelConfig
from
vllm.engine.protocol
import
EngineClient
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.chat_utils
import
ChatTemplateContentFormatOption
from
vllm.entrypoints.chat_utils
import
ChatTemplateContentFormatOption
from
vllm.entrypoints.logger
import
RequestLogger
from
vllm.entrypoints.logger
import
RequestLogger
...
@@ -32,7 +31,6 @@ class OpenAIServingTokenization(OpenAIServing):
...
@@ -32,7 +31,6 @@ class OpenAIServingTokenization(OpenAIServing):
def
__init__
(
def
__init__
(
self
,
self
,
engine_client
:
EngineClient
,
engine_client
:
EngineClient
,
model_config
:
ModelConfig
,
models
:
OpenAIServingModels
,
models
:
OpenAIServingModels
,
*
,
*
,
request_logger
:
Optional
[
RequestLogger
],
request_logger
:
Optional
[
RequestLogger
],
...
@@ -43,7 +41,6 @@ class OpenAIServingTokenization(OpenAIServing):
...
@@ -43,7 +41,6 @@ class OpenAIServingTokenization(OpenAIServing):
)
->
None
:
)
->
None
:
super
().
__init__
(
super
().
__init__
(
engine_client
=
engine_client
,
engine_client
=
engine_client
,
model_config
=
model_config
,
models
=
models
,
models
=
models
,
request_logger
=
request_logger
,
request_logger
=
request_logger
,
log_error_stack
=
log_error_stack
,
log_error_stack
=
log_error_stack
,
...
...
vllm/entrypoints/openai/serving_transcription.py
View file @
4bdf7ac5
...
@@ -5,7 +5,6 @@ from typing import Optional, Union
...
@@ -5,7 +5,6 @@ from typing import Optional, Union
from
fastapi
import
Request
from
fastapi
import
Request
from
vllm.config
import
ModelConfig
from
vllm.engine.protocol
import
EngineClient
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.logger
import
RequestLogger
from
vllm.entrypoints.logger
import
RequestLogger
from
vllm.entrypoints.openai.protocol
import
(
from
vllm.entrypoints.openai.protocol
import
(
...
@@ -34,7 +33,6 @@ class OpenAIServingTranscription(OpenAISpeechToText):
...
@@ -34,7 +33,6 @@ class OpenAIServingTranscription(OpenAISpeechToText):
def
__init__
(
def
__init__
(
self
,
self
,
engine_client
:
EngineClient
,
engine_client
:
EngineClient
,
model_config
:
ModelConfig
,
models
:
OpenAIServingModels
,
models
:
OpenAIServingModels
,
*
,
*
,
request_logger
:
Optional
[
RequestLogger
],
request_logger
:
Optional
[
RequestLogger
],
...
@@ -43,7 +41,6 @@ class OpenAIServingTranscription(OpenAISpeechToText):
...
@@ -43,7 +41,6 @@ class OpenAIServingTranscription(OpenAISpeechToText):
):
):
super
().
__init__
(
super
().
__init__
(
engine_client
=
engine_client
,
engine_client
=
engine_client
,
model_config
=
model_config
,
models
=
models
,
models
=
models
,
request_logger
=
request_logger
,
request_logger
=
request_logger
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
...
@@ -95,7 +92,6 @@ class OpenAIServingTranslation(OpenAISpeechToText):
...
@@ -95,7 +92,6 @@ class OpenAIServingTranslation(OpenAISpeechToText):
def
__init__
(
def
__init__
(
self
,
self
,
engine_client
:
EngineClient
,
engine_client
:
EngineClient
,
model_config
:
ModelConfig
,
models
:
OpenAIServingModels
,
models
:
OpenAIServingModels
,
*
,
*
,
request_logger
:
Optional
[
RequestLogger
],
request_logger
:
Optional
[
RequestLogger
],
...
@@ -104,7 +100,6 @@ class OpenAIServingTranslation(OpenAISpeechToText):
...
@@ -104,7 +100,6 @@ class OpenAIServingTranslation(OpenAISpeechToText):
):
):
super
().
__init__
(
super
().
__init__
(
engine_client
=
engine_client
,
engine_client
=
engine_client
,
model_config
=
model_config
,
models
=
models
,
models
=
models
,
request_logger
=
request_logger
,
request_logger
=
request_logger
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
...
...
vllm/entrypoints/openai/speech_to_text.py
View file @
4bdf7ac5
...
@@ -12,7 +12,6 @@ import numpy as np
...
@@ -12,7 +12,6 @@ import numpy as np
from
fastapi
import
Request
from
fastapi
import
Request
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.config
import
ModelConfig
from
vllm.engine.protocol
import
EngineClient
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.logger
import
RequestLogger
from
vllm.entrypoints.logger
import
RequestLogger
from
vllm.entrypoints.openai.protocol
import
(
from
vllm.entrypoints.openai.protocol
import
(
...
@@ -53,7 +52,6 @@ class OpenAISpeechToText(OpenAIServing):
...
@@ -53,7 +52,6 @@ class OpenAISpeechToText(OpenAIServing):
def
__init__
(
def
__init__
(
self
,
self
,
engine_client
:
EngineClient
,
engine_client
:
EngineClient
,
model_config
:
ModelConfig
,
models
:
OpenAIServingModels
,
models
:
OpenAIServingModels
,
*
,
*
,
request_logger
:
Optional
[
RequestLogger
],
request_logger
:
Optional
[
RequestLogger
],
...
@@ -63,7 +61,6 @@ class OpenAISpeechToText(OpenAIServing):
...
@@ -63,7 +61,6 @@ class OpenAISpeechToText(OpenAIServing):
):
):
super
().
__init__
(
super
().
__init__
(
engine_client
=
engine_client
,
engine_client
=
engine_client
,
model_config
=
model_config
,
models
=
models
,
models
=
models
,
request_logger
=
request_logger
,
request_logger
=
request_logger
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
...
@@ -74,7 +71,7 @@ class OpenAISpeechToText(OpenAIServing):
...
@@ -74,7 +71,7 @@ class OpenAISpeechToText(OpenAIServing):
self
.
task_type
=
task_type
self
.
task_type
=
task_type
self
.
asr_config
=
self
.
model_cls
.
get_speech_to_text_config
(
self
.
asr_config
=
self
.
model_cls
.
get_speech_to_text_config
(
model_config
,
task_type
self
.
model_config
,
task_type
)
)
self
.
max_audio_filesize_mb
=
envs
.
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB
self
.
max_audio_filesize_mb
=
envs
.
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB
...
...
vllm/inputs/data.py
View file @
4bdf7ac5
...
@@ -20,13 +20,13 @@ class TextPrompt(TypedDict):
...
@@ -20,13 +20,13 @@ class TextPrompt(TypedDict):
prompt
:
str
prompt
:
str
"""The input text to be tokenized before passing to the model."""
"""The input text to be tokenized before passing to the model."""
multi_modal_data
:
NotRequired
[
"MultiModalDataDict"
]
multi_modal_data
:
NotRequired
[
Optional
[
"MultiModalDataDict"
]
]
"""
"""
Optional multi-modal data to pass to the model,
Optional multi-modal data to pass to the model,
if the model supports it.
if the model supports it.
"""
"""
mm_processor_kwargs
:
NotRequired
[
dict
[
str
,
Any
]]
mm_processor_kwargs
:
NotRequired
[
Optional
[
dict
[
str
,
Any
]]
]
"""
"""
Optional multi-modal processor kwargs to be forwarded to the
Optional multi-modal processor kwargs to be forwarded to the
multimodal input mapper & processor. Note that if multiple modalities
multimodal input mapper & processor. Note that if multiple modalities
...
@@ -61,13 +61,13 @@ class TokensPrompt(TypedDict):
...
@@ -61,13 +61,13 @@ class TokensPrompt(TypedDict):
token_type_ids
:
NotRequired
[
list
[
int
]]
token_type_ids
:
NotRequired
[
list
[
int
]]
"""A list of token type IDs to pass to the cross encoder model."""
"""A list of token type IDs to pass to the cross encoder model."""
multi_modal_data
:
NotRequired
[
"MultiModalDataDict"
]
multi_modal_data
:
NotRequired
[
Optional
[
"MultiModalDataDict"
]
]
"""
"""
Optional multi-modal data to pass to the model,
Optional multi-modal data to pass to the model,
if the model supports it.
if the model supports it.
"""
"""
mm_processor_kwargs
:
NotRequired
[
dict
[
str
,
Any
]]
mm_processor_kwargs
:
NotRequired
[
Optional
[
dict
[
str
,
Any
]]
]
"""
"""
Optional multi-modal processor kwargs to be forwarded to the
Optional multi-modal processor kwargs to be forwarded to the
multimodal input mapper & processor. Note that if multiple modalities
multimodal input mapper & processor. Note that if multiple modalities
...
...
vllm/inputs/preprocess.py
View file @
4bdf7ac5
...
@@ -17,7 +17,7 @@ from vllm.multimodal.inputs import (
...
@@ -17,7 +17,7 @@ from vllm.multimodal.inputs import (
MultiModalUUIDDict
,
MultiModalUUIDDict
,
)
)
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
init_tokenizer_from_configs
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.utils.jsontree
import
json_iter_leaves
from
vllm.utils.jsontree
import
json_iter_leaves
from
.data
import
(
from
.data
import
(
...
@@ -45,20 +45,17 @@ class InputPreprocessor:
...
@@ -45,20 +45,17 @@ class InputPreprocessor:
def
__init__
(
def
__init__
(
self
,
self
,
model_config
:
ModelConfig
,
model_config
:
ModelConfig
,
tokenizer
:
Optional
[
AnyTokenizer
],
mm_registry
:
MultiModalRegistry
=
MULTIMODAL_REGISTRY
,
mm_registry
:
MultiModalRegistry
=
MULTIMODAL_REGISTRY
,
mm_processor_cache
:
Optional
[
BaseMultiModalProcessorCache
]
=
None
,
mm_processor_cache
:
Optional
[
BaseMultiModalProcessorCache
]
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
model_config
=
model_config
self
.
model_config
=
model_config
self
.
tokenizer
=
tokenizer
self
.
mm_registry
=
mm_registry
self
.
mm_registry
=
mm_registry
self
.
mm_processor_cache
=
mm_processor_cache
self
.
mm_processor_cache
=
mm_processor_cache
if
model_config
.
skip_tokenizer_init
:
self
.
tokenizer
=
None
else
:
self
.
tokenizer
=
init_tokenizer_from_configs
(
model_config
)
def
get_tokenizer
(
self
)
->
AnyTokenizer
:
def
get_tokenizer
(
self
)
->
AnyTokenizer
:
if
self
.
tokenizer
is
None
:
if
self
.
tokenizer
is
None
:
raise
ValueError
(
raise
ValueError
(
...
@@ -351,8 +348,8 @@ class InputPreprocessor:
...
@@ -351,8 +348,8 @@ class InputPreprocessor:
if
self
.
model_config
.
is_multimodal_model
:
if
self
.
model_config
.
is_multimodal_model
:
inputs
=
self
.
_process_multimodal
(
inputs
=
self
.
_process_multimodal
(
prompt_token_ids
,
prompt_token_ids
,
parsed_content
.
get
(
"multi_modal_data"
,
{}
)
,
parsed_content
.
get
(
"multi_modal_data"
)
or
{},
parsed_content
.
get
(
"mm_processor_kwargs"
),
parsed_content
.
get
(
"mm_processor_kwargs"
)
or
{}
,
tokenization_kwargs
=
tokenization_kwargs
,
tokenization_kwargs
=
tokenization_kwargs
,
mm_uuids
=
mm_uuids
,
mm_uuids
=
mm_uuids
,
)
)
...
@@ -380,8 +377,8 @@ class InputPreprocessor:
...
@@ -380,8 +377,8 @@ class InputPreprocessor:
if
self
.
model_config
.
is_multimodal_model
:
if
self
.
model_config
.
is_multimodal_model
:
inputs
=
self
.
_process_multimodal
(
inputs
=
self
.
_process_multimodal
(
prompt_text
,
prompt_text
,
parsed_content
.
get
(
"multi_modal_data"
,
{}
)
,
parsed_content
.
get
(
"multi_modal_data"
)
or
{},
parsed_content
.
get
(
"mm_processor_kwargs"
),
parsed_content
.
get
(
"mm_processor_kwargs"
)
or
{}
,
tokenization_kwargs
=
tokenization_kwargs
,
tokenization_kwargs
=
tokenization_kwargs
,
mm_uuids
=
mm_uuids
,
mm_uuids
=
mm_uuids
,
)
)
...
...
vllm/v1/engine/async_llm.py
View file @
4bdf7ac5
...
@@ -12,23 +12,23 @@ import numpy as np
...
@@ -12,23 +12,23 @@ import numpy as np
import
torch
import
torch
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.protocol
import
EngineClient
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.utils
import
_validate_truncation_size
from
vllm.entrypoints.utils
import
_validate_truncation_size
from
vllm.envs
import
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE
from
vllm.envs
import
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE
from
vllm.inputs
import
PromptType
from
vllm.inputs
import
PromptType
from
vllm.inputs.preprocess
import
InputPreprocessor
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalRegistry
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalRegistry
from
vllm.outputs
import
PoolingRequestOutput
,
RequestOutput
from
vllm.outputs
import
PoolingRequestOutput
,
RequestOutput
from
vllm.plugins.io_processors
import
get_io_processor
from
vllm.pooling_params
import
PoolingParams
from
vllm.pooling_params
import
PoolingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.tasks
import
SupportedTask
from
vllm.tasks
import
SupportedTask
from
vllm.tracing
import
init_tracer
from
vllm.tracing
import
init_tracer
from
vllm.transformers_utils.config
import
maybe_register_config_serialize_by_value
from
vllm.transformers_utils.config
import
maybe_register_config_serialize_by_value
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
init_tokenizer_from_configs
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
Device
,
as_list
,
cancel_task_threadsafe
,
cdiv
,
deprecate_kwargs
from
vllm.utils
import
Device
,
as_list
,
cancel_task_threadsafe
,
cdiv
,
deprecate_kwargs
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine
import
EngineCoreRequest
...
@@ -104,8 +104,16 @@ class AsyncLLM(EngineClient):
...
@@ -104,8 +104,16 @@ class AsyncLLM(EngineClient):
"logger list; enabling logging without default stat loggers"
"logger list; enabling logging without default stat loggers"
)
)
# Processor (converts Inputs --> EngineCoreRequests).
if
self
.
model_config
.
skip_tokenizer_init
:
self
.
processor
=
Processor
(
vllm_config
,
mm_registry
=
mm_registry
)
tokenizer
=
None
else
:
tokenizer
=
init_tokenizer_from_configs
(
self
.
model_config
)
self
.
processor
=
Processor
(
self
.
vllm_config
,
tokenizer
)
self
.
io_processor
=
get_io_processor
(
self
.
vllm_config
,
self
.
model_config
.
io_processor_plugin
,
)
# OutputProcessor (converts EngineCoreOutputs --> RequestOutput).
# OutputProcessor (converts EngineCoreOutputs --> RequestOutput).
self
.
output_processor
=
OutputProcessor
(
self
.
output_processor
=
OutputProcessor
(
...
@@ -245,10 +253,6 @@ class AsyncLLM(EngineClient):
...
@@ -245,10 +253,6 @@ class AsyncLLM(EngineClient):
cancel_task_threadsafe
(
getattr
(
self
,
"output_handler"
,
None
))
cancel_task_threadsafe
(
getattr
(
self
,
"output_handler"
,
None
))
@
property
def
tokenizer
(
self
)
->
Optional
[
AnyTokenizer
]:
return
self
.
processor
.
tokenizer
async
def
get_supported_tasks
(
self
)
->
tuple
[
SupportedTask
,
...]:
async
def
get_supported_tasks
(
self
)
->
tuple
[
SupportedTask
,
...]:
return
await
self
.
engine_core
.
get_supported_tasks_async
()
return
await
self
.
engine_core
.
get_supported_tasks_async
()
...
@@ -615,14 +619,13 @@ class AsyncLLM(EngineClient):
...
@@ -615,14 +619,13 @@ class AsyncLLM(EngineClient):
logger
.
info
(
"Request %s failed."
,
request_id
)
logger
.
info
(
"Request %s failed."
,
request_id
)
raise
EngineGenerateError
()
from
e
raise
EngineGenerateError
()
from
e
async
def
get_vllm_config
(
self
)
->
VllmConfig
:
@
property
return
self
.
vllm_config
def
tokenizer
(
self
)
->
Optional
[
AnyTokenizer
]:
return
self
.
processor
.
tokenizer
async
def
get_model_config
(
self
)
->
ModelConfig
:
return
self
.
model_config
async
def
get_input_preprocessor
(
self
)
->
InputPreprocessor
:
@
tokenizer
.
setter
return
self
.
processor
.
input_preprocessor
def
tokenizer
(
self
,
tokenizer
:
Optional
[
AnyTokenizer
])
->
None
:
self
.
processor
.
tokenizer
=
tokenizer
async
def
get_tokenizer
(
self
)
->
AnyTokenizer
:
async
def
get_tokenizer
(
self
)
->
AnyTokenizer
:
if
self
.
tokenizer
is
None
:
if
self
.
tokenizer
is
None
:
...
...
vllm/v1/engine/llm_engine.py
View file @
4bdf7ac5
...
@@ -19,11 +19,12 @@ from vllm.logger import init_logger
...
@@ -19,11 +19,12 @@ from vllm.logger import init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalRegistry
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalRegistry
from
vllm.outputs
import
PoolingRequestOutput
,
RequestOutput
from
vllm.outputs
import
PoolingRequestOutput
,
RequestOutput
from
vllm.plugins.io_processors
import
get_io_processor
from
vllm.pooling_params
import
PoolingParams
from
vllm.pooling_params
import
PoolingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.tasks
import
SupportedTask
from
vllm.tasks
import
SupportedTask
from
vllm.tracing
import
init_tracer
from
vllm.tracing
import
init_tracer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
init_tokenizer_from_configs
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
Device
from
vllm.utils
import
Device
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine
import
EngineCoreRequest
...
@@ -95,8 +96,16 @@ class LLMEngine:
...
@@ -95,8 +96,16 @@ class LLMEngine:
self
.
dp_group
=
None
self
.
dp_group
=
None
self
.
should_execute_dummy_batch
=
False
self
.
should_execute_dummy_batch
=
False
# Processor (convert Inputs --> EngineCoreRequests)
if
self
.
model_config
.
skip_tokenizer_init
:
self
.
processor
=
Processor
(
vllm_config
,
mm_registry
=
mm_registry
)
tokenizer
=
None
else
:
tokenizer
=
init_tokenizer_from_configs
(
self
.
model_config
)
self
.
processor
=
Processor
(
self
.
vllm_config
,
tokenizer
)
self
.
io_processor
=
get_io_processor
(
self
.
vllm_config
,
self
.
model_config
.
io_processor_plugin
,
)
# OutputProcessor (convert EngineCoreOutputs --> RequestOutput).
# OutputProcessor (convert EngineCoreOutputs --> RequestOutput).
self
.
output_processor
=
OutputProcessor
(
self
.
output_processor
=
OutputProcessor
(
...
@@ -204,14 +213,6 @@ class LLMEngine:
...
@@ -204,14 +213,6 @@ class LLMEngine:
def
validate_outputs
(
cls
,
outputs
,
output_type
):
def
validate_outputs
(
cls
,
outputs
,
output_type
):
return
outputs
return
outputs
@
property
def
tokenizer
(
self
)
->
Optional
[
AnyTokenizer
]:
return
self
.
processor
.
tokenizer
@
tokenizer
.
setter
def
tokenizer
(
self
,
tokenizer
:
Optional
[
AnyTokenizer
])
->
None
:
self
.
processor
.
tokenizer
=
tokenizer
def
get_supported_tasks
(
self
)
->
tuple
[
SupportedTask
,
...]:
def
get_supported_tasks
(
self
)
->
tuple
[
SupportedTask
,
...]:
return
self
.
engine_core
.
get_supported_tasks
()
return
self
.
engine_core
.
get_supported_tasks
()
...
@@ -313,12 +314,6 @@ class LLMEngine:
...
@@ -313,12 +314,6 @@ class LLMEngine:
return
processed_outputs
.
request_outputs
return
processed_outputs
.
request_outputs
def
get_vllm_config
(
self
):
return
self
.
vllm_config
def
get_model_config
(
self
):
return
self
.
model_config
def
start_profile
(
self
):
def
start_profile
(
self
):
self
.
engine_core
.
profile
(
True
)
self
.
engine_core
.
profile
(
True
)
...
@@ -345,6 +340,14 @@ class LLMEngine:
...
@@ -345,6 +340,14 @@ class LLMEngine:
assert
self
.
log_stats
,
"Stat logging disabled"
assert
self
.
log_stats
,
"Stat logging disabled"
return
get_metrics_snapshot
()
return
get_metrics_snapshot
()
@
property
def
tokenizer
(
self
)
->
Optional
[
AnyTokenizer
]:
return
self
.
processor
.
tokenizer
@
tokenizer
.
setter
def
tokenizer
(
self
,
tokenizer
:
Optional
[
AnyTokenizer
])
->
None
:
self
.
processor
.
tokenizer
=
tokenizer
def
get_tokenizer
(
self
)
->
AnyTokenizer
:
def
get_tokenizer
(
self
)
->
AnyTokenizer
:
if
self
.
tokenizer
is
None
:
if
self
.
tokenizer
is
None
:
raise
ValueError
(
raise
ValueError
(
...
...
vllm/v1/engine/processor.py
View file @
4bdf7ac5
...
@@ -37,6 +37,7 @@ class Processor:
...
@@ -37,6 +37,7 @@ class Processor:
def
__init__
(
def
__init__
(
self
,
self
,
vllm_config
:
VllmConfig
,
vllm_config
:
VllmConfig
,
tokenizer
:
Optional
[
AnyTokenizer
],
mm_registry
:
MultiModalRegistry
=
MULTIMODAL_REGISTRY
,
mm_registry
:
MultiModalRegistry
=
MULTIMODAL_REGISTRY
,
)
->
None
:
)
->
None
:
self
.
vllm_config
=
vllm_config
self
.
vllm_config
=
vllm_config
...
@@ -52,6 +53,7 @@ class Processor:
...
@@ -52,6 +53,7 @@ class Processor:
self
.
input_preprocessor
=
InputPreprocessor
(
self
.
input_preprocessor
=
InputPreprocessor
(
self
.
model_config
,
self
.
model_config
,
tokenizer
,
mm_registry
,
mm_registry
,
mm_processor_cache
=
self
.
mm_processor_cache
,
mm_processor_cache
=
self
.
mm_processor_cache
,
)
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment