Unverified Commit f11481b9 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Add 4-GPU runner tests and split existing tests (#6383)

parent 9d24c3ff
...@@ -89,6 +89,25 @@ jobs: ...@@ -89,6 +89,25 @@ jobs:
cd test/srt cd test/srt
python3 run_suite.py --suite per-commit-2-gpu python3 run_suite.py --suite per-commit-2-gpu
unittest-test-backend-4-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
needs: [unit-test-frontend, unit-test-backend-2-gpu]
runs-on: 4-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci_install_dependency.sh
- name: Run test
timeout-minutes: 20
run: |
cd test/srt
python3 run_suite.py --suite per-commit-4-gpu
unittest-test-backend-8-gpu: unittest-test-backend-8-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false github.event.pull_request.draft == false
......
...@@ -81,7 +81,8 @@ suites = { ...@@ -81,7 +81,8 @@ suites = {
TestFile("test_vertex_endpoint.py", 31), TestFile("test_vertex_endpoint.py", 31),
TestFile("test_vision_chunked_prefill.py", 175), TestFile("test_vision_chunked_prefill.py", 175),
TestFile("test_vlm_accuracy.py", 60), TestFile("test_vlm_accuracy.py", 60),
TestFile("test_vision_openai_server.py", 637), TestFile("test_vision_openai_server_a.py", 700),
TestFile("test_vision_openai_server_b.py", 700),
TestFile("test_w8a8_quantization.py", 46), TestFile("test_w8a8_quantization.py", 46),
TestFile("models/lora/test_lora_cuda_graph.py", 250), TestFile("models/lora/test_lora_cuda_graph.py", 250),
], ],
...@@ -104,17 +105,19 @@ suites = { ...@@ -104,17 +105,19 @@ suites = {
"per-commit-2-gpu-amd": [ "per-commit-2-gpu-amd": [
TestFile("test_mla_tp.py", 170), TestFile("test_mla_tp.py", 170),
], ],
"per-commit-4-gpu": [
TestFile("test_local_attn.py", 250),
TestFile("test_pp_single_node.py", 150),
],
"per-commit-8-gpu": [ "per-commit-8-gpu": [
# Disabled deepep tests temporarily because it takes too much time. # Disabled deepep tests temporarily because it takes too much time.
# TODO: re-enable them after reducing the test time with compilation cache and smaller models. # TODO: re-enable them after reducing the test time with compilation cache and smaller models.
# TestFile("test_deepep_intranode.py", 50), # TestFile("test_deepep_intranode.py", 50),
# TestFile("test_deepep_low_latency.py", 50), # TestFile("test_deepep_low_latency.py", 50),
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250), # TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
TestFile("test_disaggregation.py", 210), # TestFile("test_disaggregation.py", 210), # disabled since we have different_tp test
TestFile("test_local_attn.py", 250),
TestFile("test_disaggregation_different_tp.py", 210), TestFile("test_disaggregation_different_tp.py", 210),
TestFile("test_full_deepseek_v3.py", 250), TestFile("test_full_deepseek_v3.py", 250),
TestFile("test_pp_single_node.py", 150),
], ],
"per-commit-8-gpu-amd": [ "per-commit-8-gpu-amd": [
TestFile("test_full_deepseek_v3.py", 250), TestFile("test_full_deepseek_v3.py", 250),
......
...@@ -34,7 +34,7 @@ class TestPPAccuracy(unittest.TestCase): ...@@ -34,7 +34,7 @@ class TestPPAccuracy(unittest.TestCase):
"--tp-size", "--tp-size",
2, 2,
"--pp-size", "--pp-size",
4, 2,
"--chunked-prefill-size", "--chunked-prefill-size",
256, 256,
], ],
......
"""
Usage:
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
"""
from test_vision_openai_server_common import *
from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestQwen2VLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=[
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
class TestQwen2_5_VLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2.5-VL-7B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=[
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
class TestVLMContextLengthIssue(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=[
"--context-length",
"300",
"--mem-fraction-static=0.80",
],
)
cls.base_url += "/v1"
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_single_image_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
with self.assertRaises(openai.BadRequestError) as cm:
client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": IMAGE_MAN_IRONING_URL},
},
{
"type": "text",
"text": "Give a lengthy description of this picture",
},
],
},
],
temperature=0,
)
# context length is checked first, then max_req_input_len, which is calculated from the former
assert (
"Multimodal prompt is too long after expanding multimodal tokens."
in str(cm.exception)
or "is longer than the model's context length" in str(cm.exception)
)
class TestMllamaServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestMinicpmvServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "openbmb/MiniCPM-V-2_6"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
class TestInternVL2_5Server(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "OpenGVLab/InternVL2_5-2B"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--trust-remote-code"],
)
cls.base_url += "/v1"
class TestMinicpmoServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "openbmb/MiniCPM-o-2_6"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.7",
],
)
cls.base_url += "/v1"
def test_audio_chat_completion(self):
self._test_audio_speech_completion()
self._test_audio_ambient_completion()
if __name__ == "__main__":
unittest.main()
from test_vision_openai_server_common import *
from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestPixtralServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "mistral-community/pixtral-12b"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.73",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestMistral3_1Server(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "unsloth/Mistral-Small-3.1-24B-Instruct-2503"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.8",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestDeepseekVL2Server(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "deepseek-ai/deepseek-vl2-small"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--context-length",
"4096",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "deepseek-ai/deepseek-vl2-tiny"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--context-length",
"4096",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestJanusProServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "deepseek-ai/Janus-Pro-7B"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
def test_single_image_chat_completion(self):
# Skip this test because it is flaky
pass
## Skip for ci test
# class TestLlama4Server(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--chat-template",
# "llama-4",
# "--mem-fraction-static",
# "0.8",
# "--tp-size=8",
# "--context-length=8192",
# ],
# )
# cls.base_url += "/v1"
# def test_video_chat_completion(self):
# pass
class TestGemma3itServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "google/gemma-3-4b-it"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.75",
"--enable-multimodal",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestKimiVLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--context-length",
"4096",
"--dtype",
"bfloat16",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
if __name__ == "__main__":
unittest.main()
"""
Usage:
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
"""
import base64 import base64
import io import io
import json import json
...@@ -472,362 +466,3 @@ class TestOpenAIVisionServer(CustomTestCase): ...@@ -472,362 +466,3 @@ class TestOpenAIVisionServer(CustomTestCase):
def test_audio_chat_completion(self): def test_audio_chat_completion(self):
pass pass
class TestQwen2VLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=[
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
class TestQwen2_5_VLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2.5-VL-7B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=[
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
class TestVLMContextLengthIssue(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=[
"--context-length",
"300",
"--mem-fraction-static=0.80",
],
)
cls.base_url += "/v1"
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_single_image_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
with self.assertRaises(openai.BadRequestError) as cm:
client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": IMAGE_MAN_IRONING_URL},
},
{
"type": "text",
"text": "Give a lengthy description of this picture",
},
],
},
],
temperature=0,
)
# context length is checked first, then max_req_input_len, which is calculated from the former
assert (
"Multimodal prompt is too long after expanding multimodal tokens."
in str(cm.exception)
or "is longer than the model's context length" in str(cm.exception)
)
class TestMllamaServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestMinicpmvServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "openbmb/MiniCPM-V-2_6"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
class TestInternVL2_5Server(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "OpenGVLab/InternVL2_5-2B"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--trust-remote-code"],
)
cls.base_url += "/v1"
class TestMinicpmoServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "openbmb/MiniCPM-o-2_6"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.7",
],
)
cls.base_url += "/v1"
def test_audio_chat_completion(self):
self._test_audio_speech_completion()
self._test_audio_ambient_completion()
class TestPixtralServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "mistral-community/pixtral-12b"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.73",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestMistral3_1Server(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "unsloth/Mistral-Small-3.1-24B-Instruct-2503"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.8",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestDeepseekVL2Server(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "deepseek-ai/deepseek-vl2-small"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--context-length",
"4096",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "deepseek-ai/deepseek-vl2-tiny"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--context-length",
"4096",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestJanusProServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "deepseek-ai/Janus-Pro-7B"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
def test_single_image_chat_completion(self):
# Skip this test because it is flaky
pass
## Skip for ci test
# class TestLlama4Server(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--chat-template",
# "llama-4",
# "--mem-fraction-static",
# "0.8",
# "--tp-size=8",
# "--context-length=8192",
# ],
# )
# cls.base_url += "/v1"
# def test_video_chat_completion(self):
# pass
class TestGemma3itServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "google/gemma-3-4b-it"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.75",
"--enable-multimodal",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestKimiVLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--context-length",
"4096",
"--dtype",
"bfloat16",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment