Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
497efe74
"git@developer.sourcefind.cn:change/sglang.git" did not exist on "ddeb9d42dec70ba032929f1a48fc64381fdda2b2"
Unverified
Commit
497efe74
authored
Jul 16, 2025
by
Mick
Committed by
GitHub
Jul 15, 2025
Browse files
Revert "feat: replace Decord with video_reader-rs" (#8077)
parent
69f453e5
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
21 additions
and
16 deletions
+21
-16
python/pyproject.toml
python/pyproject.toml
+0
-1
python/sglang/check_env.py
python/sglang/check_env.py
+1
-1
python/sglang/srt/multimodal/processors/base_processor.py
python/sglang/srt/multimodal/processors/base_processor.py
+2
-2
python/sglang/srt/multimodal/processors/internvl.py
python/sglang/srt/multimodal/processors/internvl.py
+2
-2
python/sglang/srt/multimodal/processors/qwen_vl.py
python/sglang/srt/multimodal/processors/qwen_vl.py
+2
-2
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+14
-8
No files found.
python/pyproject.toml
View file @
497efe74
...
@@ -21,7 +21,6 @@ runtime_common = [
...
@@ -21,7 +21,6 @@ runtime_common = [
"build"
,
"build"
,
"compressed-tensors"
,
"compressed-tensors"
,
"datasets"
,
"datasets"
,
"video-reader-rs"
,
"fastapi"
,
"fastapi"
,
"hf_transfer"
,
"hf_transfer"
,
"huggingface_hub"
,
"huggingface_hub"
,
...
...
python/sglang/check_env.py
View file @
497efe74
...
@@ -47,7 +47,7 @@ PACKAGE_LIST = [
...
@@ -47,7 +47,7 @@ PACKAGE_LIST = [
"tiktoken"
,
"tiktoken"
,
"anthropic"
,
"anthropic"
,
"litellm"
,
"litellm"
,
"
video-reader-rs
"
,
"
decord
"
,
]
]
...
...
python/sglang/srt/multimodal/processors/base_processor.py
View file @
497efe74
...
@@ -206,7 +206,7 @@ class BaseMultimodalProcessor(ABC):
...
@@ -206,7 +206,7 @@ class BaseMultimodalProcessor(ABC):
estimate the total frame count from all visual input
estimate the total frame count from all visual input
"""
"""
# Lazy import because decord is not available on some arm platforms.
# Lazy import because decord is not available on some arm platforms.
from
video_reader
import
Py
VideoReader
,
cpu
from
decord
import
VideoReader
,
cpu
# Before processing inputs
# Before processing inputs
if
not
image_data
or
len
(
image_data
)
==
0
:
if
not
image_data
or
len
(
image_data
)
==
0
:
...
@@ -216,7 +216,7 @@ class BaseMultimodalProcessor(ABC):
...
@@ -216,7 +216,7 @@ class BaseMultimodalProcessor(ABC):
if
isinstance
(
image
,
str
)
and
image
.
startswith
(
"video:"
):
if
isinstance
(
image
,
str
)
and
image
.
startswith
(
"video:"
):
path
=
image
[
len
(
"video:"
)
:]
path
=
image
[
len
(
"video:"
)
:]
# Estimate frames for the video
# Estimate frames for the video
vr
=
Py
VideoReader
(
path
,
threads
=
0
)
vr
=
VideoReader
(
path
,
ctx
=
cpu
(
0
)
)
num_frames
=
len
(
vr
)
num_frames
=
len
(
vr
)
else
:
else
:
# For images, each contributes one frame
# For images, each contributes one frame
...
...
python/sglang/srt/multimodal/processors/internvl.py
View file @
497efe74
...
@@ -150,7 +150,7 @@ class InternVLImageProcessor(BaseMultimodalProcessor):
...
@@ -150,7 +150,7 @@ class InternVLImageProcessor(BaseMultimodalProcessor):
def
load_video
(
video_path
,
bound
=
None
,
input_size
=
448
,
max_num
=
1
,
num_segments
=
32
):
def
load_video
(
video_path
,
bound
=
None
,
input_size
=
448
,
max_num
=
1
,
num_segments
=
32
):
vr
=
VideoReader
(
video_path
,
ctx
=
cpu
(
0
),
num_threads
=
1
)
vr
=
VideoReader
(
video_path
,
ctx
=
cpu
(
0
),
num_threads
=
1
)
max_frame
=
len
(
vr
)
-
1
max_frame
=
len
(
vr
)
-
1
fps
=
float
(
vr
.
get_fps
())
fps
=
float
(
vr
.
get_
avg_
fps
())
pixel_values_list
,
num_patches_list
=
[],
[]
pixel_values_list
,
num_patches_list
=
[],
[]
transform
=
InternVLImageProcessor
.
build_transform
(
input_size
=
input_size
)
transform
=
InternVLImageProcessor
.
build_transform
(
input_size
=
input_size
)
...
@@ -158,7 +158,7 @@ class InternVLImageProcessor(BaseMultimodalProcessor):
...
@@ -158,7 +158,7 @@ class InternVLImageProcessor(BaseMultimodalProcessor):
bound
,
fps
,
max_frame
,
first_idx
=
0
,
num_segments
=
num_segments
bound
,
fps
,
max_frame
,
first_idx
=
0
,
num_segments
=
num_segments
)
)
for
frame_index
in
frame_indices
:
for
frame_index
in
frame_indices
:
img
=
Image
.
fromarray
(
vr
[
frame_index
]).
convert
(
"RGB"
)
img
=
Image
.
fromarray
(
vr
[
frame_index
]
.
asnumpy
()
).
convert
(
"RGB"
)
img
=
InternVLImageProcessor
.
dynamic_preprocess
(
img
=
InternVLImageProcessor
.
dynamic_preprocess
(
img
,
image_size
=
input_size
,
use_thumbnail
=
True
,
max_num
=
max_num
img
,
image_size
=
input_size
,
use_thumbnail
=
True
,
max_num
=
max_num
)
)
...
...
python/sglang/srt/multimodal/processors/qwen_vl.py
View file @
497efe74
...
@@ -156,10 +156,10 @@ async def preprocess_video(
...
@@ -156,10 +156,10 @@ async def preprocess_video(
# vr: VideoReader, image_factor: int = IMAGE_FACTOR
# vr: VideoReader, image_factor: int = IMAGE_FACTOR
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
ele
=
{}
ele
=
{}
total_frames
,
video_fps
=
len
(
vr
),
vr
.
get_fps
()
total_frames
,
video_fps
=
len
(
vr
),
vr
.
get_
avg_
fps
()
nframes
=
smart_nframes
({},
total_frames
=
total_frames
,
video_fps
=
video_fps
)
nframes
=
smart_nframes
({},
total_frames
=
total_frames
,
video_fps
=
video_fps
)
idx
=
torch
.
linspace
(
0
,
total_frames
-
1
,
nframes
).
round
().
long
().
tolist
()
idx
=
torch
.
linspace
(
0
,
total_frames
-
1
,
nframes
).
round
().
long
().
tolist
()
video
=
vr
.
get_batch
(
idx
)
video
=
vr
.
get_batch
(
idx
)
.
asnumpy
()
video
=
torch
.
tensor
(
video
).
permute
(
0
,
3
,
1
,
2
)
# Convert to TCHW format
video
=
torch
.
tensor
(
video
).
permute
(
0
,
3
,
1
,
2
)
# Convert to TCHW format
nframes
,
_
,
height
,
width
=
video
.
shape
nframes
,
_
,
height
,
width
=
video
.
shape
min_pixels
=
ele
.
get
(
"min_pixels"
,
VIDEO_MIN_PIXELS
)
min_pixels
=
ele
.
get
(
"min_pixels"
,
VIDEO_MIN_PIXELS
)
...
...
python/sglang/srt/utils.py
View file @
497efe74
...
@@ -84,7 +84,6 @@ from torch.library import Library
...
@@ -84,7 +84,6 @@ from torch.library import Library
from
torch.profiler
import
ProfilerActivity
,
profile
,
record_function
from
torch.profiler
import
ProfilerActivity
,
profile
,
record_function
from
torch.utils._contextlib
import
_DecoratorContextManager
from
torch.utils._contextlib
import
_DecoratorContextManager
from
triton.runtime.cache
import
FileCacheManager
from
triton.runtime.cache
import
FileCacheManager
from
video_reader
import
PyVideoReader
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -758,9 +757,16 @@ def load_image(
...
@@ -758,9 +757,16 @@ def load_image(
def
load_video
(
video_file
:
Union
[
str
,
bytes
],
use_gpu
:
bool
=
True
):
def
load_video
(
video_file
:
Union
[
str
,
bytes
],
use_gpu
:
bool
=
True
):
# We import decord here to avoid a strange Segmentation fault (core dumped) issue.
# We import decord here to avoid a strange Segmentation fault (core dumped) issue.
from
video_reader
import
PyVideoReader
from
decord
import
VideoReader
,
cpu
,
gpu
try
:
from
decord.bridge
import
decord_bridge
ctx
=
gpu
(
0
)
_
=
decord_bridge
.
get_ctx_device
(
ctx
)
except
Exception
:
ctx
=
cpu
(
0
)
device
=
"cuda"
if
use_gpu
and
torch
.
cuda
.
is_available
()
else
None
tmp_file
=
None
tmp_file
=
None
vr
=
None
vr
=
None
try
:
try
:
...
@@ -768,7 +774,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
...
@@ -768,7 +774,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
tmp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp4"
)
tmp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp4"
)
tmp_file
.
write
(
video_file
)
tmp_file
.
write
(
video_file
)
tmp_file
.
close
()
tmp_file
.
close
()
vr
=
Py
VideoReader
(
tmp_file
.
name
,
device
=
device
,
threads
=
0
)
vr
=
VideoReader
(
tmp_file
.
name
,
ctx
=
ctx
)
elif
isinstance
(
video_file
,
str
):
elif
isinstance
(
video_file
,
str
):
if
video_file
.
startswith
((
"http://"
,
"https://"
)):
if
video_file
.
startswith
((
"http://"
,
"https://"
)):
timeout
=
int
(
os
.
getenv
(
"REQUEST_TIMEOUT"
,
"10"
))
timeout
=
int
(
os
.
getenv
(
"REQUEST_TIMEOUT"
,
"10"
))
...
@@ -778,22 +784,22 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
...
@@ -778,22 +784,22 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
for
chunk
in
response
.
iter_content
(
chunk_size
=
8192
):
for
chunk
in
response
.
iter_content
(
chunk_size
=
8192
):
tmp_file
.
write
(
chunk
)
tmp_file
.
write
(
chunk
)
tmp_file
.
close
()
tmp_file
.
close
()
vr
=
Py
VideoReader
(
tmp_file
.
name
,
device
=
device
,
threads
=
0
)
vr
=
VideoReader
(
tmp_file
.
name
,
ctx
=
ctx
)
elif
video_file
.
startswith
(
"data:"
):
elif
video_file
.
startswith
(
"data:"
):
_
,
encoded
=
video_file
.
split
(
","
,
1
)
_
,
encoded
=
video_file
.
split
(
","
,
1
)
video_bytes
=
base64
.
b64decode
(
encoded
)
video_bytes
=
base64
.
b64decode
(
encoded
)
tmp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp4"
)
tmp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp4"
)
tmp_file
.
write
(
video_bytes
)
tmp_file
.
write
(
video_bytes
)
tmp_file
.
close
()
tmp_file
.
close
()
vr
=
Py
VideoReader
(
tmp_file
.
name
,
device
=
device
,
threads
=
0
)
vr
=
VideoReader
(
tmp_file
.
name
,
ctx
=
ctx
)
elif
os
.
path
.
isfile
(
video_file
):
elif
os
.
path
.
isfile
(
video_file
):
vr
=
Py
VideoReader
(
video_file
,
device
=
device
,
threads
=
0
)
vr
=
VideoReader
(
video_file
,
ctx
=
ctx
)
else
:
else
:
video_bytes
=
base64
.
b64decode
(
video_file
)
video_bytes
=
base64
.
b64decode
(
video_file
)
tmp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp4"
)
tmp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp4"
)
tmp_file
.
write
(
video_bytes
)
tmp_file
.
write
(
video_bytes
)
tmp_file
.
close
()
tmp_file
.
close
()
vr
=
Py
VideoReader
(
tmp_file
.
name
,
device
=
device
,
threads
=
0
)
vr
=
VideoReader
(
tmp_file
.
name
,
ctx
=
ctx
)
else
:
else
:
raise
ValueError
(
f
"Unsupported video input type:
{
type
(
video_file
)
}
"
)
raise
ValueError
(
f
"Unsupported video input type:
{
type
(
video_file
)
}
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment