Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7eb4a51c
Unverified
Commit
7eb4a51c
authored
Aug 09, 2024
by
Cyrus Leung
Committed by
GitHub
Aug 09, 2024
Browse files
[Core] Support serving encoder/decoder models (#7258)
parent
0fa14907
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
40 additions
and
70 deletions
+40
-70
vllm/model_executor/models/interfaces.py
vllm/model_executor/models/interfaces.py
+11
-11
vllm/multimodal/image.py
vllm/multimodal/image.py
+4
-2
vllm/sequence.py
vllm/sequence.py
+1
-1
vllm/utils.py
vllm/utils.py
+22
-52
vllm/worker/worker.py
vllm/worker/worker.py
+2
-4
No files found.
vllm/model_executor/models/interfaces.py
View file @
7eb4a51c
from
typing
import
(
ClassVar
,
Dict
,
List
,
Literal
,
Optional
,
Protocol
,
Type
,
Union
,
overload
,
runtime_checkable
)
from
typing_extensions
import
Type
Guard
from
typing_extensions
import
Type
Is
from
vllm.config
import
LoRAConfig
,
MultiModalConfig
,
SchedulerConfig
from
vllm.logger
import
init_logger
...
...
@@ -37,18 +37,18 @@ class _SupportsVisionType(Protocol):
@
overload
def
supports_vision
(
model
:
Type
[
object
])
->
Type
Guard
[
Type
[
SupportsVision
]]:
def
supports_vision
(
model
:
Type
[
object
])
->
Type
Is
[
Type
[
SupportsVision
]]:
...
@
overload
def
supports_vision
(
model
:
object
)
->
Type
Guard
[
SupportsVision
]:
def
supports_vision
(
model
:
object
)
->
Type
Is
[
SupportsVision
]:
...
def
supports_vision
(
model
:
Union
[
Type
[
object
],
object
],
)
->
Union
[
Type
Guard
[
Type
[
SupportsVision
]],
Type
Guard
[
SupportsVision
]]:
)
->
Union
[
Type
Is
[
Type
[
SupportsVision
]],
Type
Is
[
SupportsVision
]]:
if
isinstance
(
model
,
type
):
return
isinstance
(
model
,
_SupportsVisionType
)
...
...
@@ -94,18 +94,18 @@ class _SupportsLoRAType(Protocol):
@
overload
def
supports_lora
(
model
:
Type
[
object
])
->
Type
Guard
[
Type
[
SupportsLoRA
]]:
def
supports_lora
(
model
:
Type
[
object
])
->
Type
Is
[
Type
[
SupportsLoRA
]]:
...
@
overload
def
supports_lora
(
model
:
object
)
->
Type
Guard
[
SupportsLoRA
]:
def
supports_lora
(
model
:
object
)
->
Type
Is
[
SupportsLoRA
]:
...
def
supports_lora
(
model
:
Union
[
Type
[
object
],
object
],
)
->
Union
[
Type
Guard
[
Type
[
SupportsLoRA
]],
Type
Guard
[
SupportsLoRA
]]:
)
->
Union
[
Type
Is
[
Type
[
SupportsLoRA
]],
Type
Is
[
SupportsLoRA
]]:
result
=
_supports_lora
(
model
)
if
not
result
:
...
...
@@ -137,7 +137,7 @@ def supports_lora(
def
_supports_lora
(
model
:
Union
[
Type
[
object
],
object
],
)
->
Union
[
Type
Guard
[
Type
[
SupportsLoRA
]],
Type
Guard
[
SupportsLoRA
]]:
)
->
Union
[
Type
Is
[
Type
[
SupportsLoRA
]],
Type
Is
[
SupportsLoRA
]]:
if
isinstance
(
model
,
type
):
return
isinstance
(
model
,
_SupportsLoRAType
)
...
...
@@ -172,18 +172,18 @@ class _HasInnerStateType(Protocol):
@
overload
def
has_inner_state
(
model
:
object
)
->
Type
Guard
[
HasInnerState
]:
def
has_inner_state
(
model
:
object
)
->
Type
Is
[
HasInnerState
]:
...
@
overload
def
has_inner_state
(
model
:
Type
[
object
])
->
Type
Guard
[
Type
[
HasInnerState
]]:
def
has_inner_state
(
model
:
Type
[
object
])
->
Type
Is
[
Type
[
HasInnerState
]]:
...
def
has_inner_state
(
model
:
Union
[
Type
[
object
],
object
]
)
->
Union
[
Type
Guard
[
Type
[
HasInnerState
]],
Type
Guard
[
HasInnerState
]]:
)
->
Union
[
Type
Is
[
Type
[
HasInnerState
]],
Type
Is
[
HasInnerState
]]:
if
isinstance
(
model
,
type
):
return
isinstance
(
model
,
_HasInnerStateType
)
...
...
vllm/multimodal/image.py
View file @
7eb4a51c
...
...
@@ -10,6 +10,7 @@ from vllm.inputs.registry import InputContext
from
vllm.logger
import
init_logger
from
vllm.transformers_utils.image_processor
import
get_image_processor
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
from
vllm.utils
import
is_list_of
from
.base
import
MultiModalInputs
,
MultiModalPlugin
...
...
@@ -113,7 +114,8 @@ class ImagePlugin(MultiModalPlugin):
def
_default_input_mapper
(
self
,
ctx
:
InputContext
,
data
:
object
)
->
MultiModalInputs
:
model_config
=
ctx
.
model_config
if
isinstance
(
data
,
(
Image
.
Image
,
list
)):
if
isinstance
(
data
,
Image
.
Image
)
or
is_list_of
(
data
,
Image
.
Image
):
image_processor
=
self
.
_get_hf_image_processor
(
model_config
)
if
image_processor
is
None
:
raise
RuntimeError
(
"No HuggingFace processor is available "
...
...
@@ -127,7 +129,7 @@ class ImagePlugin(MultiModalPlugin):
raise
return
MultiModalInputs
(
batch_data
)
elif
isinstance
(
data
,
torch
.
Tensor
):
elif
isinstance
(
data
,
torch
.
Tensor
)
or
is_list_of
(
data
,
torch
.
Tensor
):
raise
NotImplementedError
(
"Embeddings input is not supported yet"
)
raise
TypeError
(
f
"Invalid image type:
{
type
(
data
)
}
"
)
...
...
vllm/sequence.py
View file @
7eb4a51c
...
...
@@ -11,7 +11,7 @@ from typing import (TYPE_CHECKING, Dict, List, Mapping, Optional, Set, Tuple,
import
torch
from
vllm.inputs
import
is_valid_encoder_decoder_llm_inputs
from
vllm.inputs
.parse
import
is_valid_encoder_decoder_llm_inputs
from
vllm.lora.request
import
LoRARequest
from
vllm.pooling_params
import
PoolingParams
from
vllm.prompt_adapter.request
import
PromptAdapterRequest
...
...
vllm/utils.py
View file @
7eb4a51c
...
...
@@ -17,8 +17,8 @@ from collections import defaultdict
from
functools
import
lru_cache
,
partial
,
wraps
from
platform
import
uname
from
typing
import
(
Any
,
AsyncGenerator
,
Awaitable
,
Callable
,
Dict
,
Generic
,
Hashable
,
List
,
Optional
,
OrderedDict
,
Set
,
Tuple
,
TypeVar
,
Union
,
overload
)
Hashable
,
List
,
Literal
,
Optional
,
OrderedDict
,
Set
,
Tuple
,
Type
,
TypeVar
,
Union
,
overload
)
from
uuid
import
uuid4
import
numpy
as
np
...
...
@@ -26,12 +26,10 @@ import numpy.typing as npt
import
psutil
import
torch
import
torch.types
from
typing_extensions
import
ParamSpec
from
typing_extensions
import
ParamSpec
,
TypeIs
,
assert_never
import
vllm.envs
as
envs
from
vllm
import
_custom_ops
as
ops
from
vllm.inputs
import
(
ExplicitEncoderDecoderPrompt
,
PromptInputs
,
SingletonPromptInputs
)
from
vllm.logger
import
enable_trace_function_call
,
init_logger
logger
=
init_logger
(
__name__
)
...
...
@@ -812,6 +810,24 @@ def get_dtype_size(dtype: torch.dtype) -> int:
return
torch
.
tensor
([],
dtype
=
dtype
).
element_size
()
# `collections` helpers
def
is_list_of
(
value
:
object
,
typ
:
Type
[
T
],
*
,
check
:
Literal
[
"first"
,
"all"
]
=
"first"
,
)
->
TypeIs
[
List
[
T
]]:
if
not
isinstance
(
value
,
list
):
return
False
if
check
==
"first"
:
return
len
(
value
)
==
0
or
isinstance
(
value
[
0
],
typ
)
elif
check
==
"all"
:
return
all
(
isinstance
(
v
,
typ
)
for
v
in
value
)
assert_never
(
check
)
def
merge_dicts
(
dict1
:
Dict
[
K
,
List
[
T
]],
dict2
:
Dict
[
K
,
List
[
T
]])
->
Dict
[
K
,
List
[
T
]]:
"""Merge 2 dicts that have key -> List of items.
...
...
@@ -959,6 +975,7 @@ def enable_trace_function_call_for_thread() -> None:
enable_trace_function_call
(
log_path
)
# `functools` helpers
def
identity
(
value
:
T
)
->
T
:
return
value
...
...
@@ -1080,50 +1097,3 @@ async def _run_task_with_lock(task: Callable, lock: asyncio.Lock, *args,
"""Utility function to run async task in a lock"""
async
with
lock
:
return
await
task
(
*
args
,
**
kwargs
)
def
is_encoder_decoder_model_config
(
model_config
)
->
bool
:
'''
Extract the HF encoder/decoder model flag from the ModelConfig instance.
Return False if model_config is None.
'''
return
model_config
is
not
None
and
\
getattr
(
model_config
.
hf_config
,
"is_encoder_decoder"
,
False
)
def
is_embedding_model_config
(
model_config
)
->
bool
:
'''
Extract the embedding model flag from the ModelConfig instance.
Return False if model_config is None.
'''
return
model_config
is
not
None
and
\
model_config
.
embedding_mode
def
build_explicit_enc_dec_prompt
(
encoder_prompt
:
SingletonPromptInputs
,
decoder_prompt
:
SingletonPromptInputs
,
)
->
ExplicitEncoderDecoderPrompt
:
return
ExplicitEncoderDecoderPrompt
(
encoder_prompt
=
encoder_prompt
,
decoder_prompt
=
decoder_prompt
)
def
zip_enc_dec_prompt_lists
(
enc_prompt_list
:
List
[
SingletonPromptInputs
],
dec_prompt_list
:
List
[
SingletonPromptInputs
],
)
->
List
[
ExplicitEncoderDecoderPrompt
]:
return
[
build_explicit_enc_dec_prompt
(
encoder_prompt
,
decoder_prompt
)
for
(
encoder_prompt
,
decoder_prompt
)
in
zip
(
enc_prompt_list
,
dec_prompt_list
)
]
def
to_enc_dec_tuple_list
(
enc_dec_prompts
:
List
[
ExplicitEncoderDecoderPrompt
],
)
->
List
[
Tuple
[
PromptInputs
,
PromptInputs
]]:
return
[(
enc_dec_prompt
[
'encoder_prompt'
],
enc_dec_prompt
[
'decoder_prompt'
])
for
enc_dec_prompt
in
enc_dec_prompts
]
vllm/worker/worker.py
View file @
7eb4a51c
...
...
@@ -19,8 +19,6 @@ from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
from
vllm.platforms
import
current_platform
from
vllm.prompt_adapter.request
import
PromptAdapterRequest
from
vllm.sequence
import
ExecuteModelRequest
from
vllm.utils
import
(
is_embedding_model_config
,
is_encoder_decoder_model_config
)
from
vllm.worker.cache_engine
import
CacheEngine
from
vllm.worker.embedding_model_runner
import
EmbeddingModelRunner
from
vllm.worker.enc_dec_model_runner
import
EncoderDecoderModelRunner
...
...
@@ -113,10 +111,10 @@ class Worker(LocalOrDistributedWorkerBase):
self
.
gpu_cache
:
Optional
[
List
[
List
[
torch
.
Tensor
]]]
=
None
def
_is_encoder_decoder_model
(
self
):
return
is_encoder_decoder_model
_config
(
self
.
model_config
)
return
self
.
model_config
.
is_encoder_decoder_model
def
_is_embedding_model
(
self
):
return
is_embedding_model
_config
(
self
.
model_config
)
return
self
.
model_config
.
is_embedding_model
def
init_device
(
self
)
->
None
:
if
self
.
device_config
.
device
.
type
==
"cuda"
:
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment