Unverified Commit 1078396f authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Update deps for mllama4 (#5215)

parent 7e4f72dd
......@@ -38,7 +38,7 @@ runtime_common = [
"pyzmq>=25.1.2",
"soundfile==0.13.1",
"torchao>=0.7.0",
"transformers==4.51.0",
"transformers==4.51.1",
"uvicorn",
"uvloop",
"compressed-tensors",
......@@ -50,6 +50,7 @@ srt = [
"sgl-kernel==0.0.8",
"flashinfer_python==0.2.3",
"torch==2.5.1",
"torchvision==0.20.1",
"cuda-python",
"outlines>=0.0.44,<=0.1.11",
"partial_json_parser",
......
......@@ -840,7 +840,6 @@ class Scheduler(
bootstrap_room=recv_req.bootstrap_room,
)
req.tokenizer = self.tokenizer
req.queue_time_start = time.time()
if (
recv_req.session_params is not None
......@@ -855,7 +854,6 @@ class Scheduler(
# Create a new request from a previous session
session = self.sessions[recv_req.session_params.id]
req = session.create_req(recv_req, self.tokenizer)
req.queue_time_start = time.time()
if isinstance(req.finished_reason, FINISH_ABORT):
self._add_request_to_queue(req)
return
......@@ -958,6 +956,7 @@ class Scheduler(
self.disagg_decode_prealloc_queue.add(req)
else:
req.queue_time_start = time.time()
self.waiting_queue.append(req)
def _extend_requests_to_queue(self, reqs: List[Req], is_retracted: bool = False):
......
......@@ -682,29 +682,30 @@ class TestJanusProServer(TestOpenAIVisionServer):
pass
class TestLlama4Server(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--chat-template",
"llama-4",
"--mem-fraction-static",
"0.8",
"--tp-size=8",
"--context-length=8192",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
## Skip for ci test
# class TestLlama4Server(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--chat-template",
# "llama-4",
# "--mem-fraction-static",
# "0.8",
# "--tp-size=8",
# "--context-length=8192",
# ],
# )
# cls.base_url += "/v1"
# def test_video_chat_completion(self):
# pass
class TestGemma3itServer(TestOpenAIVisionServer):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment