Unverified Commit b4408e60 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Revert "fix: fix video input for qwen3-vl" (#11437)

parent 52fcbbb8
...@@ -1126,11 +1126,6 @@ class MRotaryEmbedding(RotaryEmbedding): ...@@ -1126,11 +1126,6 @@ class MRotaryEmbedding(RotaryEmbedding):
second_per_grid_ts: Optional[torch.Tensor] = None, second_per_grid_ts: Optional[torch.Tensor] = None,
**kwargs, **kwargs,
) -> Tuple[torch.Tensor, torch.Tensor]: ) -> Tuple[torch.Tensor, torch.Tensor]:
if model_type.startswith("qwen3_vl") and video_grid_thw is not None:
video_grid_thw = torch.repeat_interleave(
video_grid_thw, video_grid_thw[:, 0], dim=0
)
video_grid_thw[:, 0] = 1
mrope_position_deltas = [] mrope_position_deltas = []
if input_ids is not None and ( if input_ids is not None and (
image_grid_thw is not None or video_grid_thw is not None image_grid_thw is not None or video_grid_thw is not None
......
...@@ -186,6 +186,7 @@ UNBALANCED_MODEL_LOADING_TIMEOUT_S = 300 ...@@ -186,6 +186,7 @@ UNBALANCED_MODEL_LOADING_TIMEOUT_S = 300
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
if _is_npu: if _is_npu:
import torch_npu import torch_npu
...@@ -624,22 +625,6 @@ class ModelRunner: ...@@ -624,22 +625,6 @@ class ModelRunner:
"Setting hicache_io_backend to vanilla I/O, which may lead to suboptimal performance with small page sizes." "Setting hicache_io_backend to vanilla I/O, which may lead to suboptimal performance with small page sizes."
) )
if self.model_config.hf_config.model_type == "qwen3_vl_moe":
if (
quantization_config := getattr(
self.model_config.hf_config, "quantization_config"
)
) is not None:
text_config = self.model_config.hf_text_config
weight_block_size_n = quantization_config["weight_block_size"][0]
if (
text_config.moe_intermediate_size
// (self.tp_size // self.moe_ep_size)
) % weight_block_size_n != 0:
raise ValueError(
f"For qwen3-vl-fp8 models, please make sure ({text_config.moe_intermediate_size=} // ({self.tp_size=} // {self.moe_ep_size=})) % {weight_block_size_n=} == 0"
)
def init_torch_distributed(self): def init_torch_distributed(self):
logger.info("Init torch distributed begin.") logger.info("Init torch distributed begin.")
......
...@@ -50,27 +50,6 @@ class TestQwen2VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin): ...@@ -50,27 +50,6 @@ class TestQwen2VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
cls.base_url += "/v1" cls.base_url += "/v1"
class TestQwen3VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen3-VL-30B-A3B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=[
"--mem-fraction-static",
"0.80",
"--cuda-graph-max-bs",
"4",
],
)
cls.base_url += "/v1"
class TestQwen2_5_VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin): class TestQwen2_5_VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
......
...@@ -494,7 +494,7 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase): ...@@ -494,7 +494,7 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase):
**(self.get_vision_request_kwargs()), **(self.get_vision_request_kwargs()),
) )
video_response = response.choices[0].message.content.lower() video_response = response.choices[0].message.content
print("-" * 30) print("-" * 30)
print(f"Video response:\n{video_response}") print(f"Video response:\n{video_response}")
...@@ -502,10 +502,9 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase): ...@@ -502,10 +502,9 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase):
# Add assertions to validate the video response # Add assertions to validate the video response
assert ( assert (
"ipod" in video_response "iPod" in video_response
or "device" in video_response or "device" in video_response
or "microphone" in video_response or "microphone" in video_response
or "phone" in video_response
), f"video_response: {video_response}, should contain 'iPod' or 'device'" ), f"video_response: {video_response}, should contain 'iPod' or 'device'"
assert ( assert (
"man" in video_response "man" in video_response
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment