Unverified Commit eddaafc1 authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[Multimodal] Improve max video embedding length estimation in V1 (#24312)


Signed-off-by: default avatarRoger Wang <hey@rogerw.me>
Co-authored-by: default avatarRoger Wang <hey@rogerw.me>
parent 305a1cc0
...@@ -216,12 +216,9 @@ class LlavaOnevisionProcessingInfo(LlavaNextProcessingInfo): ...@@ -216,12 +216,9 @@ class LlavaOnevisionProcessingInfo(LlavaNextProcessingInfo):
seq_len: int, seq_len: int,
mm_counts: Mapping[str, int], mm_counts: Mapping[str, int],
) -> int: ) -> int:
max_images = mm_counts.get("image", 0)
max_videos = mm_counts.get("video", 0) max_videos = mm_counts.get("video", 0)
max_image_tokens = self.get_max_image_tokens() * max_images max_total_frames = self._get_max_video_frames(seq_len)
max_total_frames = self._get_max_video_frames(seq_len -
max_image_tokens)
max_frames_per_video = min(max_total_frames // max(max_videos, 1), max_frames_per_video = min(max_total_frames // max(max_videos, 1),
_MAX_FRAMES_PER_VIDEO) _MAX_FRAMES_PER_VIDEO)
......
...@@ -915,12 +915,9 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo): ...@@ -915,12 +915,9 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
seq_len: int, seq_len: int,
mm_counts: Mapping[str, int], mm_counts: Mapping[str, int],
) -> int: ) -> int:
max_images = mm_counts.get("image", 0)
max_videos = mm_counts.get("video", 0) max_videos = mm_counts.get("video", 0)
max_image_tokens = self.get_max_image_tokens() * max_images max_total_frames = self._get_max_video_frames(seq_len)
max_total_frames = self._get_max_video_frames(seq_len -
max_image_tokens)
max_frames_per_video = min(max_total_frames // max(max_videos, 1), max_frames_per_video = min(max_total_frames // max(max_videos, 1),
_MAX_FRAMES_PER_VIDEO) _MAX_FRAMES_PER_VIDEO)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment