Unverified Commit f3440adc authored by Xinyuan Tong's avatar Xinyuan Tong Committed by GitHub
Browse files

vlm: enable GLM4.1V server testing & fix video processing (#10095)


Signed-off-by: default avatarXinyuan Tong <xinyuantong.cs@gmail.com>
Co-authored-by: default avatarBinyao Jiang <byjiang1996@gmail.com>
parent 5a7e10fe
...@@ -2,7 +2,6 @@ import re ...@@ -2,7 +2,6 @@ import re
from typing import List, Union from typing import List, Union
from decord import VideoReader from decord import VideoReader
from transformers.video_utils import VideoMetadata
from sglang.srt.layers.rotary_embedding import MRotaryEmbedding from sglang.srt.layers.rotary_embedding import MRotaryEmbedding
from sglang.srt.models.glm4v import Glm4vForConditionalGeneration from sglang.srt.models.glm4v import Glm4vForConditionalGeneration
...@@ -66,17 +65,18 @@ class Glm4vImageProcessor(SGLangBaseProcessor): ...@@ -66,17 +65,18 @@ class Glm4vImageProcessor(SGLangBaseProcessor):
total_num_frames = len(vr) total_num_frames = len(vr)
duration = total_num_frames / video_fps if video_fps else 0 duration = total_num_frames / video_fps if video_fps else 0
metadata = VideoMetadata(
total_num_frames=int(total_num_frames),
fps=float(video_fps),
duration=float(duration),
video_backend="decord",
)
# Extract all frames # Extract all frames
indices = list(range(total_num_frames)) indices = list(range(total_num_frames))
frames = vr.get_batch(indices).asnumpy() frames = vr.get_batch(indices).asnumpy()
metadata.frames_indices = indices
# Return metadata as dict so transformers can properly create VideoMetadata objects
metadata = {
"total_num_frames": int(total_num_frames),
"fps": float(video_fps),
"duration": float(duration),
"video_backend": "decord",
"frames_indices": indices,
}
return frames, metadata return frames, metadata
......
...@@ -217,31 +217,27 @@ class TestKimiVLServer(ImageOpenAITestMixin): ...@@ -217,31 +217,27 @@ class TestKimiVLServer(ImageOpenAITestMixin):
pass pass
# Skip for ci test class TestGLM41VServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
# class TestGLM41VServer(TestOpenAIVisionServer): @classmethod
# @classmethod def setUpClass(cls):
# def setUpClass(cls): cls.model = "zai-org/GLM-4.1V-9B-Thinking"
# cls.model = "zai-org/GLM-4.1V-9B-Thinking" cls.base_url = DEFAULT_URL_FOR_TEST
# cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456"
# cls.api_key = "sk-123456" cls.process = popen_launch_server(
# cls.process = popen_launch_server( cls.model,
# cls.model, cls.base_url,
# cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=[
# other_args=[ "--trust-remote-code",
# "--trust-remote-code", "--mem-fraction-static",
# "--mem-fraction-static", "0.68",
# "0.68", "--cuda-graph-max-bs",
# "--cuda-graph-max-bs", "4",
# "4", "--reasoning-parser",
# "--reasoning-parser", "glm45",
# "glm45", ],
# ], )
# ) cls.base_url += "/v1"
# cls.base_url += "/v1"
# def test_video_chat_completion(self):
# self._test_video_chat_completion()
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment