Unverified Commit 02723e1b authored by Mick's avatar Mick Committed by GitHub
Browse files

CI: rewrite test_vision_chunked_prefill to speedup (#5682)

parent df2cf583
...@@ -69,7 +69,7 @@ suites = { ...@@ -69,7 +69,7 @@ suites = {
TestFile("test_update_weights_from_disk.py", 114), TestFile("test_update_weights_from_disk.py", 114),
TestFile("test_update_weights_from_tensor.py", 48), TestFile("test_update_weights_from_tensor.py", 48),
TestFile("test_vertex_endpoint.py", 31), TestFile("test_vertex_endpoint.py", 31),
TestFile("test_vision_chunked_prefill.py", 223), TestFile("test_vision_chunked_prefill.py", 99),
TestFile("test_vlm_accuracy.py", 60), TestFile("test_vlm_accuracy.py", 60),
TestFile("test_vision_openai_server.py", 537), TestFile("test_vision_openai_server.py", 537),
TestFile("test_fim_completion.py", 40), TestFile("test_fim_completion.py", 40),
......
...@@ -24,6 +24,7 @@ from sglang.test.test_utils import ( ...@@ -24,6 +24,7 @@ from sglang.test.test_utils import (
class TestVisionChunkedPrefill(CustomTestCase): class TestVisionChunkedPrefill(CustomTestCase):
def prepare_video_messages(self, video_path, max_frames_num=8): def prepare_video_messages(self, video_path, max_frames_num=8):
# We import decord here to avoid a strange Segmentation fault (core dumped) issue. # We import decord here to avoid a strange Segmentation fault (core dumped) issue.
# The following import order will cause Segmentation fault. # The following import order will cause Segmentation fault.
...@@ -128,7 +129,7 @@ class TestVisionChunkedPrefill(CustomTestCase): ...@@ -128,7 +129,7 @@ class TestVisionChunkedPrefill(CustomTestCase):
return responses return responses
def run_generate(self, chunked_prefill_size, batch, num_frame): def launch_server(self, chunked_prefill_size) -> int:
# launch server # launch server
model = "lmms-lab/llava-onevision-qwen2-7b-ov" model = "lmms-lab/llava-onevision-qwen2-7b-ov"
# model = "meta-llama/Llama-3.2-11B-Vision-Instruct" # model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
...@@ -142,38 +143,46 @@ class TestVisionChunkedPrefill(CustomTestCase): ...@@ -142,38 +143,46 @@ class TestVisionChunkedPrefill(CustomTestCase):
f"{chunked_prefill_size}", f"{chunked_prefill_size}",
], ],
) )
return process.pid
def _test_chunked_prefill(self, batches, num_frames):
# Chunked
try: try:
return self.generate_for_video(batch, num_frame) chunked_server_pid = self.launch_server(chunked_prefill_size=1024)
outputs_chunked = []
for batch, num_frame in zip(batches, num_frames):
output_chunked = self.generate_for_video(
batch=batch, num_frame=num_frame
)
outputs_chunked += [output_chunked]
finally: finally:
kill_process_tree(process.pid) kill_process_tree(chunked_server_pid)
def test_chunked_prefill(self): # None-chunked
output_chunked = self.run_generate( try:
chunked_prefill_size=1024, batch=False, num_frame=1 no_chunked_server_pid = self.launch_server(chunked_prefill_size=-1)
) outputs_no_chunked = []
output_no_chunked = self.run_generate( for batch, num_frame in zip(batches, num_frames):
chunked_prefill_size=-1, batch=False, num_frame=1 output_no_chunked = self.generate_for_video(
batch=batch, num_frame=num_frame
) )
outputs_no_chunked += [output_no_chunked]
print("output with chunked prefill:") finally:
print(output_chunked) kill_process_tree(no_chunked_server_pid)
print("output without chunked prefill:")
print(output_no_chunked)
assert output_chunked == output_no_chunked
output_chunked = self.run_generate(
chunked_prefill_size=1024, batch=True, num_frame=[2, 6, 8, 10]
)
output_no_chunked = self.run_generate(
chunked_prefill_size=-1, batch=True, num_frame=[2, 6, 8, 10]
)
for output_chunked, output_no_chunked in zip(
outputs_chunked, outputs_no_chunked
):
print("output with chunked prefill:") print("output with chunked prefill:")
print(output_chunked) print(output_chunked)
print("output without chunked prefill:") print("output without chunked prefill:")
print(output_no_chunked) print(output_no_chunked)
assert output_chunked == output_no_chunked assert output_chunked == output_no_chunked
def test_chunked_prefill(self):
self._test_chunked_prefill(batches=[False, True], num_frames=[1, [2, 6, 8, 10]])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment