Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
fd389df9
Unverified
Commit
fd389df9
authored
Oct 17, 2025
by
StonyPort
Committed by
GitHub
Oct 16, 2025
Browse files
Reduce the image processing latency in VLM (#11541)
Co-authored-by:
qiuxuan.lzw
<
qiuxuan.lzw@alibaba-inc.com
>
parent
b0d1d717
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
3 deletions
+16
-3
python/sglang/srt/environ.py
python/sglang/srt/environ.py
+4
-0
python/sglang/srt/multimodal/processors/base_processor.py
python/sglang/srt/multimodal/processors/base_processor.py
+3
-1
python/sglang/srt/multimodal/processors/qwen_vl.py
python/sglang/srt/multimodal/processors/qwen_vl.py
+9
-2
No files found.
python/sglang/srt/environ.py
View file @
fd389df9
...
...
@@ -221,6 +221,10 @@ class Envs:
SGLANG_TRITON_PREFILL_TRUNCATION_ALIGN_SIZE
=
EnvInt
(
4096
)
SGLANG_TRITON_DECODE_SPLIT_TILE_SIZE
=
EnvInt
(
256
)
# VLM
SGLANG_IMAGE_MAX_PIXELS
=
EnvInt
(
16384
*
28
*
28
)
SGLANG_RESIZE_RESAMPLE
=
EnvStr
(
""
)
# fmt: on
...
...
python/sglang/srt/multimodal/processors/base_processor.py
View file @
fd389df9
...
...
@@ -313,7 +313,9 @@ class BaseMultimodalProcessor(ABC):
try
:
if
modality
==
Modality
.
IMAGE
:
img
,
_
=
load_image
(
data
)
return
img
.
convert
(
"RGB"
)
if
discard_alpha_channel
else
img
if
discard_alpha_channel
and
img
.
mode
!=
"RGB"
:
img
=
img
.
convert
(
"RGB"
)
return
img
elif
modality
==
Modality
.
VIDEO
:
return
load_video
(
data
,
frame_count_limit
)
elif
modality
==
Modality
.
AUDIO
:
...
...
python/sglang/srt/multimodal/processors/qwen_vl.py
View file @
fd389df9
...
...
@@ -9,6 +9,7 @@ import torchvision
from
PIL
import
Image
from
torchvision.transforms
import
InterpolationMode
from
sglang.srt.environ
import
envs
from
sglang.srt.layers.rotary_embedding
import
MRotaryEmbedding
from
sglang.srt.models.qwen2_5_vl
import
Qwen2_5_VLForConditionalGeneration
from
sglang.srt.models.qwen2_vl
import
Qwen2VLForConditionalGeneration
...
...
@@ -23,8 +24,14 @@ from sglang.utils import logger
IMAGE_FACTOR
=
28
MIN_PIXELS
=
4
*
28
*
28
MAX_PIXELS
=
16384
*
28
*
28
MAX_PIXELS
=
envs
.
SGLANG_IMAGE_MAX_PIXELS
.
get
()
MAX_RATIO
=
200
RESIZE_RESAMPLE
=
getattr
(
Image
,
envs
.
SGLANG_RESIZE_RESAMPLE
.
get
(),
None
)
if
envs
.
SGLANG_RESIZE_RESAMPLE
.
is_set
()
and
RESIZE_RESAMPLE
is
None
:
logger
.
warning
(
f
"Invalid RESIZE_RESAMPLE value: '
{
envs
.
SGLANG_RESIZE_RESAMPLE
.
get
()
}
'. "
f
"Ignoring and using default."
)
VIDEO_TOTAL_PIXELS
=
int
(
float
(
os
.
environ
.
get
(
"VIDEO_MAX_PIXELS"
,
128000
*
28
*
28
*
0.9
))
)
...
...
@@ -86,7 +93,7 @@ def resize_image(
min_pixels
=
min_pixels
,
max_pixels
=
max_pixels
,
)
image
=
image
.
resize
((
resized_width
,
resized_height
))
image
=
image
.
resize
((
resized_width
,
resized_height
)
,
resample
=
RESIZE_RESAMPLE
)
return
image
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment