Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
831453fc
Unverified
Commit
831453fc
authored
Jan 30, 2026
by
Cyrus Leung
Committed by
GitHub
Jan 29, 2026
Browse files
[Chore] Move `MediaConnector` to `vllm.multimodal.media` (#33324)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
5a66c9cc
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
381 additions
and
350 deletions
+381
-350
tests/multimodal/test_utils.py
tests/multimodal/test_utils.py
+2
-1
vllm/entrypoints/chat_utils.py
vllm/entrypoints/chat_utils.py
+1
-1
vllm/model_executor/models/nemotron_parse.py
vllm/model_executor/models/nemotron_parse.py
+1
-1
vllm/multimodal/inputs.py
vllm/multimodal/inputs.py
+3
-3
vllm/multimodal/media/__init__.py
vllm/multimodal/media/__init__.py
+5
-1
vllm/multimodal/media/connector.py
vllm/multimodal/media/connector.py
+343
-0
vllm/multimodal/utils.py
vllm/multimodal/utils.py
+24
-343
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+2
-0
No files found.
tests/multimodal/test_utils.py
View file @
831453fc
...
@@ -14,7 +14,8 @@ from PIL import Image, ImageChops
...
@@ -14,7 +14,8 @@ from PIL import Image, ImageChops
from
vllm.multimodal.image
import
convert_image_mode
from
vllm.multimodal.image
import
convert_image_mode
from
vllm.multimodal.inputs
import
PlaceholderRange
from
vllm.multimodal.inputs
import
PlaceholderRange
from
vllm.multimodal.utils
import
MediaConnector
,
argsort_mm_positions
from
vllm.multimodal.media
import
MediaConnector
from
vllm.multimodal.utils
import
argsort_mm_positions
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
TEST_IMAGE_ASSETS
=
[
TEST_IMAGE_ASSETS
=
[
...
...
vllm/entrypoints/chat_utils.py
View file @
831453fc
...
@@ -50,8 +50,8 @@ from vllm.multimodal.inputs import (
...
@@ -50,8 +50,8 @@ from vllm.multimodal.inputs import (
VisionChunkImage
,
VisionChunkImage
,
VisionChunkVideo
,
VisionChunkVideo
,
)
)
from
vllm.multimodal.media
import
MEDIA_CONNECTOR_REGISTRY
,
MediaConnector
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
vllm.multimodal.utils
import
MEDIA_CONNECTOR_REGISTRY
,
MediaConnector
from
vllm.utils
import
random_uuid
from
vllm.utils
import
random_uuid
from
vllm.utils.collection_utils
import
is_list_of
from
vllm.utils.collection_utils
import
is_list_of
from
vllm.utils.import_utils
import
LazyLoader
from
vllm.utils.import_utils
import
LazyLoader
...
...
vllm/model_executor/models/nemotron_parse.py
View file @
831453fc
...
@@ -58,8 +58,8 @@ from vllm.multimodal.processing import (
...
@@ -58,8 +58,8 @@ from vllm.multimodal.processing import (
PromptReplacement
,
PromptReplacement
,
PromptUpdate
,
PromptUpdate
,
)
)
from
vllm.tokenizers
import
TokenizerLike
from
vllm.transformers_utils.configs.radio
import
RadioConfig
from
vllm.transformers_utils.configs.radio
import
RadioConfig
from
vllm.transformers_utils.tokenizer
import
TokenizerLike
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.v1.attention.backend
import
AttentionType
from
vllm.v1.attention.backend
import
AttentionType
...
...
vllm/multimodal/inputs.py
View file @
831453fc
...
@@ -27,12 +27,12 @@ from vllm.utils.collection_utils import is_list_of
...
@@ -27,12 +27,12 @@ from vllm.utils.collection_utils import is_list_of
from
vllm.utils.import_utils
import
LazyLoader
from
vllm.utils.import_utils
import
LazyLoader
from
vllm.utils.jsontree
import
json_map_leaves
from
vllm.utils.jsontree
import
json_map_leaves
from
.media
import
MediaWithBytes
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
import
torch
import
torch
import
torch.types
import
torch.types
from
transformers.feature_extraction_utils
import
BatchFeature
from
transformers.feature_extraction_utils
import
BatchFeature
from
.media
import
MediaWithBytes
else
:
else
:
torch
=
LazyLoader
(
"torch"
,
globals
(),
"torch"
)
torch
=
LazyLoader
(
"torch"
,
globals
(),
"torch"
)
...
@@ -58,7 +58,7 @@ Represents a single audio
...
@@ -58,7 +58,7 @@ Represents a single audio
item, which can be passed to a HuggingFace `AudioProcessor`.
item, which can be passed to a HuggingFace `AudioProcessor`.
"""
"""
ImageItem
:
TypeAlias
=
Union
[
HfImageItem
,
"torch.Tensor"
,
"
MediaWithBytes[HfImageItem]
"
]
ImageItem
:
TypeAlias
=
Union
[
HfImageItem
,
"torch.Tensor"
,
MediaWithBytes
[
HfImageItem
]]
"""
"""
A `transformers.image_utils.ImageInput` representing a single image
A `transformers.image_utils.ImageInput` representing a single image
item, which can be passed to a HuggingFace `ImageProcessor`.
item, which can be passed to a HuggingFace `ImageProcessor`.
...
...
vllm/multimodal/media/__init__.py
View file @
831453fc
...
@@ -2,8 +2,9 @@
...
@@ -2,8 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
.audio
import
AudioEmbeddingMediaIO
,
AudioMediaIO
from
.audio
import
AudioEmbeddingMediaIO
,
AudioMediaIO
from
.base
import
MediaIO
,
MediaWithBytes
from
.base
import
MediaIO
,
MediaWithBytes
from
.connector
import
MEDIA_CONNECTOR_REGISTRY
,
MediaConnector
from
.image
import
ImageEmbeddingMediaIO
,
ImageMediaIO
from
.image
import
ImageEmbeddingMediaIO
,
ImageMediaIO
from
.video
import
VideoMediaIO
from
.video
import
VIDEO_LOADER_REGISTRY
,
VideoMediaIO
__all__
=
[
__all__
=
[
"MediaIO"
,
"MediaIO"
,
...
@@ -12,5 +13,8 @@ __all__ = [
...
@@ -12,5 +13,8 @@ __all__ = [
"AudioMediaIO"
,
"AudioMediaIO"
,
"ImageEmbeddingMediaIO"
,
"ImageEmbeddingMediaIO"
,
"ImageMediaIO"
,
"ImageMediaIO"
,
"VIDEO_LOADER_REGISTRY"
,
"VideoMediaIO"
,
"VideoMediaIO"
,
"MEDIA_CONNECTOR_REGISTRY"
,
"MediaConnector"
,
]
]
vllm/multimodal/media/connector.py
0 → 100644
View file @
831453fc
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
asyncio
import
atexit
from
concurrent.futures
import
ThreadPoolExecutor
from
pathlib
import
Path
from
typing
import
Any
,
TypeVar
from
urllib.request
import
url2pathname
import
numpy
as
np
import
numpy.typing
as
npt
import
torch
from
PIL
import
Image
,
UnidentifiedImageError
from
urllib3.util
import
Url
,
parse_url
import
vllm.envs
as
envs
from
vllm.connections
import
HTTPConnection
,
global_http_connection
from
vllm.utils.registry
import
ExtensionManager
from
.audio
import
AudioEmbeddingMediaIO
,
AudioMediaIO
from
.base
import
MediaIO
from
.image
import
ImageEmbeddingMediaIO
,
ImageMediaIO
from
.video
import
VideoMediaIO
_M
=
TypeVar
(
"_M"
)
global_thread_pool
=
ThreadPoolExecutor
(
max_workers
=
envs
.
VLLM_MEDIA_LOADING_THREAD_COUNT
)
atexit
.
register
(
global_thread_pool
.
shutdown
)
MEDIA_CONNECTOR_REGISTRY
=
ExtensionManager
()
@
MEDIA_CONNECTOR_REGISTRY
.
register
(
"http"
)
class
MediaConnector
:
def
__init__
(
self
,
media_io_kwargs
:
dict
[
str
,
dict
[
str
,
Any
]]
|
None
=
None
,
connection
:
HTTPConnection
=
global_http_connection
,
*
,
allowed_local_media_path
:
str
=
""
,
allowed_media_domains
:
list
[
str
]
|
None
=
None
,
)
->
None
:
"""
Args:
media_io_kwargs: Additional args passed to process media
inputs, keyed by modalities. For example,
to set num_frames for video, set
`--media-io-kwargs '{"video":{"num_frames":40}}'`
connection: HTTP connection client to download media contents.
allowed_local_media_path: A local directory to load media files from.
allowed_media_domains: If set, only media URLs that belong to this
domain can be used for multi-modal inputs.
"""
super
().
__init__
()
self
.
media_io_kwargs
:
dict
[
str
,
dict
[
str
,
Any
]]
=
(
media_io_kwargs
if
media_io_kwargs
else
{}
)
self
.
connection
=
connection
if
allowed_local_media_path
:
allowed_local_media_path_
=
Path
(
allowed_local_media_path
)
if
not
allowed_local_media_path_
.
exists
():
raise
ValueError
(
"Invalid `--allowed-local-media-path`: The path "
f
"
{
allowed_local_media_path_
}
does not exist."
)
if
not
allowed_local_media_path_
.
is_dir
():
raise
ValueError
(
"Invalid `--allowed-local-media-path`: The path "
f
"
{
allowed_local_media_path_
}
must be a directory."
)
else
:
allowed_local_media_path_
=
None
self
.
allowed_local_media_path
=
allowed_local_media_path_
if
allowed_media_domains
is
None
:
allowed_media_domains
=
[]
self
.
allowed_media_domains
=
allowed_media_domains
def
_load_data_url
(
self
,
url_spec
:
Url
,
media_io
:
MediaIO
[
_M
],
)
->
_M
:
# type: ignore[type-var]
url_spec_path
=
url_spec
.
path
or
""
data_spec
,
data
=
url_spec_path
.
split
(
","
,
1
)
media_type
,
data_type
=
data_spec
.
split
(
";"
,
1
)
# media_type starts with a leading "/" (e.g., "/video/jpeg")
media_type
=
media_type
.
lstrip
(
"/"
)
if
data_type
!=
"base64"
:
msg
=
"Only base64 data URLs are supported for now."
raise
NotImplementedError
(
msg
)
return
media_io
.
load_base64
(
media_type
,
data
)
def
_load_file_url
(
self
,
url_spec
:
Url
,
media_io
:
MediaIO
[
_M
],
)
->
_M
:
# type: ignore[type-var]
allowed_local_media_path
=
self
.
allowed_local_media_path
if
allowed_local_media_path
is
None
:
raise
RuntimeError
(
"Cannot load local files without `--allowed-local-media-path`."
)
url_spec_path
=
url_spec
.
path
or
""
url_spec_netloc
=
url_spec
.
netloc
or
""
filepath
=
Path
(
url2pathname
(
url_spec_netloc
+
url_spec_path
))
if
allowed_local_media_path
not
in
filepath
.
resolve
().
parents
:
raise
ValueError
(
f
"The file path
{
filepath
}
must be a subpath "
f
"of `--allowed-local-media-path
{
allowed_local_media_path
}
`."
)
return
media_io
.
load_file
(
filepath
)
def
_assert_url_in_allowed_media_domains
(
self
,
url_spec
:
Url
)
->
None
:
if
(
self
.
allowed_media_domains
and
url_spec
.
hostname
not
in
self
.
allowed_media_domains
):
raise
ValueError
(
f
"The URL must be from one of the allowed domains: "
f
"
{
self
.
allowed_media_domains
}
. Input URL domain: "
f
"
{
url_spec
.
hostname
}
"
)
def
load_from_url
(
self
,
url
:
str
,
media_io
:
MediaIO
[
_M
],
*
,
fetch_timeout
:
int
|
None
=
None
,
)
->
_M
:
# type: ignore[type-var]
url_spec
=
parse_url
(
url
)
if
url_spec
.
scheme
and
url_spec
.
scheme
.
startswith
(
"http"
):
self
.
_assert_url_in_allowed_media_domains
(
url_spec
)
connection
=
self
.
connection
data
=
connection
.
get_bytes
(
url
,
timeout
=
fetch_timeout
,
allow_redirects
=
envs
.
VLLM_MEDIA_URL_ALLOW_REDIRECTS
,
)
return
media_io
.
load_bytes
(
data
)
if
url_spec
.
scheme
==
"data"
:
return
self
.
_load_data_url
(
url_spec
,
media_io
)
if
url_spec
.
scheme
==
"file"
:
return
self
.
_load_file_url
(
url_spec
,
media_io
)
msg
=
"The URL must be either a HTTP, data or file URL."
raise
ValueError
(
msg
)
async
def
load_from_url_async
(
self
,
url
:
str
,
media_io
:
MediaIO
[
_M
],
*
,
fetch_timeout
:
int
|
None
=
None
,
)
->
_M
:
url_spec
=
parse_url
(
url
)
loop
=
asyncio
.
get_running_loop
()
if
url_spec
.
scheme
and
url_spec
.
scheme
.
startswith
(
"http"
):
self
.
_assert_url_in_allowed_media_domains
(
url_spec
)
connection
=
self
.
connection
data
=
await
connection
.
async_get_bytes
(
url
,
timeout
=
fetch_timeout
,
allow_redirects
=
envs
.
VLLM_MEDIA_URL_ALLOW_REDIRECTS
,
)
future
=
loop
.
run_in_executor
(
global_thread_pool
,
media_io
.
load_bytes
,
data
)
return
await
future
if
url_spec
.
scheme
==
"data"
:
future
=
loop
.
run_in_executor
(
global_thread_pool
,
self
.
_load_data_url
,
url_spec
,
media_io
)
return
await
future
if
url_spec
.
scheme
==
"file"
:
future
=
loop
.
run_in_executor
(
global_thread_pool
,
self
.
_load_file_url
,
url_spec
,
media_io
)
return
await
future
msg
=
"The URL must be either a HTTP, data or file URL."
raise
ValueError
(
msg
)
def
fetch_audio
(
self
,
audio_url
:
str
,
)
->
tuple
[
np
.
ndarray
,
int
|
float
]:
"""
Load audio from a URL.
"""
audio_io
=
AudioMediaIO
(
**
self
.
media_io_kwargs
.
get
(
"audio"
,
{}))
return
self
.
load_from_url
(
audio_url
,
audio_io
,
fetch_timeout
=
envs
.
VLLM_AUDIO_FETCH_TIMEOUT
,
)
async
def
fetch_audio_async
(
self
,
audio_url
:
str
,
)
->
tuple
[
np
.
ndarray
,
int
|
float
]:
"""
Asynchronously fetch audio from a URL.
"""
audio_io
=
AudioMediaIO
(
**
self
.
media_io_kwargs
.
get
(
"audio"
,
{}))
return
await
self
.
load_from_url_async
(
audio_url
,
audio_io
,
fetch_timeout
=
envs
.
VLLM_AUDIO_FETCH_TIMEOUT
,
)
def
fetch_image
(
self
,
image_url
:
str
,
*
,
image_mode
:
str
=
"RGB"
,
)
->
Image
.
Image
:
"""
Load a PIL image from an HTTP or base64 data URL.
By default, the image is converted into RGB format.
"""
image_io
=
ImageMediaIO
(
image_mode
=
image_mode
,
**
self
.
media_io_kwargs
.
get
(
"image"
,
{})
)
try
:
return
self
.
load_from_url
(
image_url
,
image_io
,
fetch_timeout
=
envs
.
VLLM_IMAGE_FETCH_TIMEOUT
,
)
except
UnidentifiedImageError
as
e
:
# convert to ValueError to be properly caught upstream
raise
ValueError
(
str
(
e
))
from
e
async
def
fetch_image_async
(
self
,
image_url
:
str
,
*
,
image_mode
:
str
=
"RGB"
,
)
->
Image
.
Image
:
"""
Asynchronously load a PIL image from an HTTP or base64 data URL.
By default, the image is converted into RGB format.
"""
image_io
=
ImageMediaIO
(
image_mode
=
image_mode
,
**
self
.
media_io_kwargs
.
get
(
"image"
,
{})
)
try
:
return
await
self
.
load_from_url_async
(
image_url
,
image_io
,
fetch_timeout
=
envs
.
VLLM_IMAGE_FETCH_TIMEOUT
,
)
except
UnidentifiedImageError
as
e
:
# convert to ValueError to be properly caught upstream
raise
ValueError
(
str
(
e
))
from
e
def
fetch_video
(
self
,
video_url
:
str
,
*
,
image_mode
:
str
=
"RGB"
,
)
->
tuple
[
npt
.
NDArray
,
dict
[
str
,
Any
]]:
"""
Load video from an HTTP or base64 data URL.
"""
image_io
=
ImageMediaIO
(
image_mode
=
image_mode
,
**
self
.
media_io_kwargs
.
get
(
"image"
,
{})
)
video_io
=
VideoMediaIO
(
image_io
,
**
self
.
media_io_kwargs
.
get
(
"video"
,
{}))
return
self
.
load_from_url
(
video_url
,
video_io
,
fetch_timeout
=
envs
.
VLLM_VIDEO_FETCH_TIMEOUT
,
)
async
def
fetch_video_async
(
self
,
video_url
:
str
,
*
,
image_mode
:
str
=
"RGB"
,
)
->
tuple
[
npt
.
NDArray
,
dict
[
str
,
Any
]]:
"""
Asynchronously load video from an HTTP or base64 data URL.
By default, the image is converted into RGB format.
"""
image_io
=
ImageMediaIO
(
image_mode
=
image_mode
,
**
self
.
media_io_kwargs
.
get
(
"image"
,
{})
)
video_io
=
VideoMediaIO
(
image_io
,
**
self
.
media_io_kwargs
.
get
(
"video"
,
{}))
return
await
self
.
load_from_url_async
(
video_url
,
video_io
,
fetch_timeout
=
envs
.
VLLM_VIDEO_FETCH_TIMEOUT
,
)
def
fetch_image_embedding
(
self
,
data
:
str
,
)
->
torch
.
Tensor
:
"""
Load image embedding from a URL.
"""
image_embedding_io
=
ImageEmbeddingMediaIO
()
return
image_embedding_io
.
load_base64
(
""
,
data
)
def
fetch_audio_embedding
(
self
,
data
:
str
,
)
->
torch
.
Tensor
:
"""
Load audio embedding from a URL.
"""
audio_embedding_io
=
AudioEmbeddingMediaIO
()
return
audio_embedding_io
.
load_base64
(
""
,
data
)
vllm/multimodal/utils.py
View file @
831453fc
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
asyncio
import
atexit
import
mimetypes
import
mimetypes
import
warnings
from
collections.abc
import
Generator
from
collections.abc
import
Generator
from
concurrent.futures
import
ThreadPoolExecutor
from
itertools
import
groupby
from
itertools
import
groupby
from
pathlib
import
Path
from
typing
import
TYPE_CHECKING
,
Any
from
typing
import
TYPE_CHECKING
,
Any
,
TypeVar
from
urllib.request
import
url2pathname
import
numpy
as
np
import
numpy
as
np
import
numpy.typing
as
npt
import
numpy.typing
as
npt
import
torch
from
PIL
import
Image
from
PIL
import
Image
,
UnidentifiedImageError
from
urllib3.util
import
Url
,
parse_url
import
vllm.envs
as
envs
from
vllm.connections
import
HTTPConnection
,
global_http_connection
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.utils.registry
import
ExtensionManager
from
vllm.utils.import_utils
import
LazyLoader
from
.media
import
(
AudioEmbeddingMediaIO
,
AudioMediaIO
,
ImageEmbeddingMediaIO
,
ImageMediaIO
,
MediaIO
,
VideoMediaIO
,
)
if
TYPE_CHECKING
:
from
.inputs
import
(
from
.inputs
import
(
BatchedTensorInputs
,
BatchedTensorInputs
,
MultiModalKwargsItem
,
MultiModalKwargsItem
,
MultiModalKwargsItems
,
MultiModalPlaceholderDict
,
MultiModalPlaceholderDict
,
)
else
:
BatchedTensorInputs
=
Any
MultiModalKwargsItem
=
Any
MultiModalPlaceholderDict
=
Any
logger
=
init_logger
(
__name__
)
global_thread_pool
=
ThreadPoolExecutor
(
max_workers
=
envs
.
VLLM_MEDIA_LOADING_THREAD_COUNT
)
)
atexit
.
register
(
global_thread_pool
.
shutdown
)
from
.media
import
AudioMediaIO
,
ImageMediaIO
,
MediaConnector
,
VideoMediaIO
_M
=
TypeVar
(
"_M"
)
MEDIA_CONNECTOR_REGISTRY
=
ExtensionManager
()
@
MEDIA_CONNECTOR_REGISTRY
.
register
(
"http"
)
class
MediaConnector
:
def
__init__
(
self
,
media_io_kwargs
:
dict
[
str
,
dict
[
str
,
Any
]]
|
None
=
None
,
connection
:
HTTPConnection
=
global_http_connection
,
*
,
allowed_local_media_path
:
str
=
""
,
allowed_media_domains
:
list
[
str
]
|
None
=
None
,
)
->
None
:
"""
Args:
media_io_kwargs: Additional args passed to process media
inputs, keyed by modalities. For example,
to set num_frames for video, set
`--media-io-kwargs '{"video":{"num_frames":40}}'`
connection: HTTP connection client to download media contents.
allowed_local_media_path: A local directory to load media files from.
allowed_media_domains: If set, only media URLs that belong to this
domain can be used for multi-modal inputs.
"""
super
().
__init__
()
self
.
media_io_kwargs
:
dict
[
str
,
dict
[
str
,
Any
]]
=
(
media_io_kwargs
if
media_io_kwargs
else
{}
)
self
.
connection
=
connection
if
allowed_local_media_path
:
allowed_local_media_path_
=
Path
(
allowed_local_media_path
)
if
not
allowed_local_media_path_
.
exists
():
raise
ValueError
(
"Invalid `--allowed-local-media-path`: The path "
f
"
{
allowed_local_media_path_
}
does not exist."
)
if
not
allowed_local_media_path_
.
is_dir
():
raise
ValueError
(
"Invalid `--allowed-local-media-path`: The path "
f
"
{
allowed_local_media_path_
}
must be a directory."
)
else
:
allowed_local_media_path_
=
None
self
.
allowed_local_media_path
=
allowed_local_media_path_
if
allowed_media_domains
is
None
:
allowed_media_domains
=
[]
self
.
allowed_media_domains
=
allowed_media_domains
def
_load_data_url
(
self
,
url_spec
:
Url
,
media_io
:
MediaIO
[
_M
],
)
->
_M
:
# type: ignore[type-var]
url_spec_path
=
url_spec
.
path
or
""
data_spec
,
data
=
url_spec_path
.
split
(
","
,
1
)
media_type
,
data_type
=
data_spec
.
split
(
";"
,
1
)
# media_type starts with a leading "/" (e.g., "/video/jpeg")
media_type
=
media_type
.
lstrip
(
"/"
)
if
data_type
!=
"base64"
:
msg
=
"Only base64 data URLs are supported for now."
raise
NotImplementedError
(
msg
)
return
media_io
.
load_base64
(
media_type
,
data
)
def
_load_file_url
(
self
,
url_spec
:
Url
,
media_io
:
MediaIO
[
_M
],
)
->
_M
:
# type: ignore[type-var]
allowed_local_media_path
=
self
.
allowed_local_media_path
if
allowed_local_media_path
is
None
:
raise
RuntimeError
(
"Cannot load local files without `--allowed-local-media-path`."
)
url_spec_path
=
url_spec
.
path
or
""
url_spec_netloc
=
url_spec
.
netloc
or
""
filepath
=
Path
(
url2pathname
(
url_spec_netloc
+
url_spec_path
))
if
allowed_local_media_path
not
in
filepath
.
resolve
().
parents
:
raise
ValueError
(
f
"The file path
{
filepath
}
must be a subpath "
f
"of `--allowed-local-media-path
{
allowed_local_media_path
}
`."
)
return
media_io
.
load_file
(
filepath
)
def
_assert_url_in_allowed_media_domains
(
self
,
url_spec
:
Url
)
->
None
:
if
(
self
.
allowed_media_domains
and
url_spec
.
hostname
not
in
self
.
allowed_media_domains
):
raise
ValueError
(
f
"The URL must be from one of the allowed domains: "
f
"
{
self
.
allowed_media_domains
}
. Input URL domain: "
f
"
{
url_spec
.
hostname
}
"
)
def
load_from_url
(
self
,
url
:
str
,
media_io
:
MediaIO
[
_M
],
*
,
fetch_timeout
:
int
|
None
=
None
,
)
->
_M
:
# type: ignore[type-var]
url_spec
=
parse_url
(
url
)
if
url_spec
.
scheme
and
url_spec
.
scheme
.
startswith
(
"http"
):
self
.
_assert_url_in_allowed_media_domains
(
url_spec
)
connection
=
self
.
connection
data
=
connection
.
get_bytes
(
url
,
timeout
=
fetch_timeout
,
allow_redirects
=
envs
.
VLLM_MEDIA_URL_ALLOW_REDIRECTS
,
)
return
media_io
.
load_bytes
(
data
)
if
url_spec
.
scheme
==
"data"
:
return
self
.
_load_data_url
(
url_spec
,
media_io
)
if
url_spec
.
scheme
==
"file"
:
return
self
.
_load_file_url
(
url_spec
,
media_io
)
msg
=
"The URL must be either a HTTP, data or file URL."
raise
ValueError
(
msg
)
async
def
load_from_url_async
(
self
,
url
:
str
,
media_io
:
MediaIO
[
_M
],
*
,
fetch_timeout
:
int
|
None
=
None
,
)
->
_M
:
url_spec
=
parse_url
(
url
)
loop
=
asyncio
.
get_running_loop
()
if
url_spec
.
scheme
and
url_spec
.
scheme
.
startswith
(
"http"
):
self
.
_assert_url_in_allowed_media_domains
(
url_spec
)
connection
=
self
.
connection
data
=
await
connection
.
async_get_bytes
(
url
,
timeout
=
fetch_timeout
,
allow_redirects
=
envs
.
VLLM_MEDIA_URL_ALLOW_REDIRECTS
,
)
future
=
loop
.
run_in_executor
(
global_thread_pool
,
media_io
.
load_bytes
,
data
)
return
await
future
if
url_spec
.
scheme
==
"data"
:
future
=
loop
.
run_in_executor
(
global_thread_pool
,
self
.
_load_data_url
,
url_spec
,
media_io
)
return
await
future
if
url_spec
.
scheme
==
"file"
:
future
=
loop
.
run_in_executor
(
global_thread_pool
,
self
.
_load_file_url
,
url_spec
,
media_io
)
return
await
future
msg
=
"The URL must be either a HTTP, data or file URL."
raise
ValueError
(
msg
)
def
fetch_audio
(
self
,
audio_url
:
str
,
)
->
tuple
[
np
.
ndarray
,
int
|
float
]:
"""
Load audio from a URL.
"""
audio_io
=
AudioMediaIO
(
**
self
.
media_io_kwargs
.
get
(
"audio"
,
{}))
return
self
.
load_from_url
(
audio_url
,
audio_io
,
fetch_timeout
=
envs
.
VLLM_AUDIO_FETCH_TIMEOUT
,
)
async
def
fetch_audio_async
(
self
,
audio_url
:
str
,
)
->
tuple
[
np
.
ndarray
,
int
|
float
]:
"""
Asynchronously fetch audio from a URL.
"""
audio_io
=
AudioMediaIO
(
**
self
.
media_io_kwargs
.
get
(
"audio"
,
{}))
return
await
self
.
load_from_url_async
(
audio_url
,
audio_io
,
fetch_timeout
=
envs
.
VLLM_AUDIO_FETCH_TIMEOUT
,
)
def
fetch_image
(
self
,
image_url
:
str
,
*
,
image_mode
:
str
=
"RGB"
,
)
->
Image
.
Image
:
"""
Load a PIL image from an HTTP or base64 data URL.
By default, the image is converted into RGB format.
"""
image_io
=
ImageMediaIO
(
image_mode
=
image_mode
,
**
self
.
media_io_kwargs
.
get
(
"image"
,
{})
)
try
:
return
self
.
load_from_url
(
image_url
,
image_io
,
fetch_timeout
=
envs
.
VLLM_IMAGE_FETCH_TIMEOUT
,
)
except
UnidentifiedImageError
as
e
:
# convert to ValueError to be properly caught upstream
raise
ValueError
(
str
(
e
))
from
e
async
def
fetch_image_async
(
self
,
image_url
:
str
,
*
,
image_mode
:
str
=
"RGB"
,
)
->
Image
.
Image
:
"""
Asynchronously load a PIL image from an HTTP or base64 data URL.
By default, the image is converted into RGB format.
"""
image_io
=
ImageMediaIO
(
image_mode
=
image_mode
,
**
self
.
media_io_kwargs
.
get
(
"image"
,
{})
)
try
:
return
await
self
.
load_from_url_async
(
image_url
,
image_io
,
fetch_timeout
=
envs
.
VLLM_IMAGE_FETCH_TIMEOUT
,
)
except
UnidentifiedImageError
as
e
:
# convert to ValueError to be properly caught upstream
raise
ValueError
(
str
(
e
))
from
e
def
fetch_video
(
if
TYPE_CHECKING
:
self
,
import
torch.types
video_url
:
str
,
else
:
*
,
torch
=
LazyLoader
(
"torch"
,
globals
(),
"torch"
)
image_mode
:
str
=
"RGB"
,
)
->
tuple
[
npt
.
NDArray
,
dict
[
str
,
Any
]]:
"""
Load video from an HTTP or base64 data URL.
"""
image_io
=
ImageMediaIO
(
image_mode
=
image_mode
,
**
self
.
media_io_kwargs
.
get
(
"image"
,
{})
)
video_io
=
VideoMediaIO
(
image_io
,
**
self
.
media_io_kwargs
.
get
(
"video"
,
{}))
return
self
.
load_from_url
(
logger
=
init_logger
(
__name__
)
video_url
,
video_io
,
fetch_timeout
=
envs
.
VLLM_VIDEO_FETCH_TIMEOUT
,
)
async
def
fetch_video_async
(
self
,
video_url
:
str
,
*
,
image_mode
:
str
=
"RGB"
,
)
->
tuple
[
npt
.
NDArray
,
dict
[
str
,
Any
]]:
"""
Asynchronously load video from an HTTP or base64 data URL.
By default, the image is converted into RGB format.
def
__getattr__
(
name
:
str
):
"""
if
name
==
"MEDIA_CONNECTOR_REGISTRY"
:
image_io
=
ImageMediaIO
(
from
.media
import
MEDIA_CONNECTOR_REGISTRY
image_mode
=
image_mode
,
**
self
.
media_io_kwargs
.
get
(
"image"
,
{})
)
video_io
=
VideoMediaIO
(
image_io
,
**
self
.
media_io_kwargs
.
get
(
"video"
,
{}))
return
await
self
.
load_from_url_async
(
warnings
.
warn
(
video_url
,
"`vllm.multimodal.utils.MEDIA_CONNECTOR_REGISTRY` "
video_io
,
"has been moved to `vllm.multimodal.media.MEDIA_CONNECTOR_REGISTRY`. "
fetch_timeout
=
envs
.
VLLM_VIDEO_FETCH_TIMEOUT
,
"The old name will be removed in v0.17."
,
DeprecationWarning
,
stacklevel
=
2
,
)
)
def
fetch_image_embedding
(
return
MEDIA_CONNECTOR_REGISTRY
self
,
data
:
str
,
)
->
torch
.
Tensor
:
"""
Load image embedding from a URL.
"""
image_embedding_io
=
ImageEmbeddingMediaIO
()
return
image_embedding_io
.
load_base64
(
""
,
data
)
def
fetch_audio_embedding
(
self
,
data
:
str
,
)
->
torch
.
Tensor
:
"""
Load audio embedding from a URL.
"""
audio_embedding_io
=
AudioEmbeddingMediaIO
()
r
eturn
audio_embedding_io
.
load_base64
(
""
,
data
)
r
aise
AttributeError
(
f
"module
{
__name__
!
r
}
has no attribute
{
name
!
r
}
"
)
def
encode_audio_base64
(
def
encode_audio_base64
(
...
@@ -483,8 +166,6 @@ def group_mm_kwargs_by_modality(
...
@@ -483,8 +166,6 @@ def group_mm_kwargs_by_modality(
Yields:
Yields:
A tuple `(modality, num_items, grouped_kwargs)`.
A tuple `(modality, num_items, grouped_kwargs)`.
"""
"""
from
vllm.multimodal.inputs
import
MultiModalKwargsItems
for
modality
,
group
in
groupby
(
mm_kwargs
,
key
=
lambda
x
:
x
[
0
]):
for
modality
,
group
in
groupby
(
mm_kwargs
,
key
=
lambda
x
:
x
[
0
]):
items_lst
=
[
item
for
_
,
item
in
group
]
items_lst
=
[
item
for
_
,
item
in
group
]
mm_kwargs_items
=
MultiModalKwargsItems
({
modality
:
items_lst
})
mm_kwargs_items
=
MultiModalKwargsItems
({
modality
:
items_lst
})
...
...
vllm/transformers_utils/tokenizer.py
View file @
831453fc
...
@@ -17,3 +17,5 @@ def __getattr__(name: str):
...
@@ -17,3 +17,5 @@ def __getattr__(name: str):
)
)
return
get_tokenizer
return
get_tokenizer
raise
AttributeError
(
f
"module
{
__name__
!
r
}
has no attribute
{
name
!
r
}
"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment