Unverified Commit 1078396f authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Update deps for mllama4 (#5215)

parent 7e4f72dd
...@@ -38,7 +38,7 @@ runtime_common = [ ...@@ -38,7 +38,7 @@ runtime_common = [
"pyzmq>=25.1.2", "pyzmq>=25.1.2",
"soundfile==0.13.1", "soundfile==0.13.1",
"torchao>=0.7.0", "torchao>=0.7.0",
"transformers==4.51.0", "transformers==4.51.1",
"uvicorn", "uvicorn",
"uvloop", "uvloop",
"compressed-tensors", "compressed-tensors",
...@@ -50,6 +50,7 @@ srt = [ ...@@ -50,6 +50,7 @@ srt = [
"sgl-kernel==0.0.8", "sgl-kernel==0.0.8",
"flashinfer_python==0.2.3", "flashinfer_python==0.2.3",
"torch==2.5.1", "torch==2.5.1",
"torchvision==0.20.1",
"cuda-python", "cuda-python",
"outlines>=0.0.44,<=0.1.11", "outlines>=0.0.44,<=0.1.11",
"partial_json_parser", "partial_json_parser",
......
...@@ -840,7 +840,6 @@ class Scheduler( ...@@ -840,7 +840,6 @@ class Scheduler(
bootstrap_room=recv_req.bootstrap_room, bootstrap_room=recv_req.bootstrap_room,
) )
req.tokenizer = self.tokenizer req.tokenizer = self.tokenizer
req.queue_time_start = time.time()
if ( if (
recv_req.session_params is not None recv_req.session_params is not None
...@@ -855,7 +854,6 @@ class Scheduler( ...@@ -855,7 +854,6 @@ class Scheduler(
# Create a new request from a previous session # Create a new request from a previous session
session = self.sessions[recv_req.session_params.id] session = self.sessions[recv_req.session_params.id]
req = session.create_req(recv_req, self.tokenizer) req = session.create_req(recv_req, self.tokenizer)
req.queue_time_start = time.time()
if isinstance(req.finished_reason, FINISH_ABORT): if isinstance(req.finished_reason, FINISH_ABORT):
self._add_request_to_queue(req) self._add_request_to_queue(req)
return return
...@@ -958,6 +956,7 @@ class Scheduler( ...@@ -958,6 +956,7 @@ class Scheduler(
self.disagg_decode_prealloc_queue.add(req) self.disagg_decode_prealloc_queue.add(req)
else: else:
req.queue_time_start = time.time()
self.waiting_queue.append(req) self.waiting_queue.append(req)
def _extend_requests_to_queue(self, reqs: List[Req], is_retracted: bool = False): def _extend_requests_to_queue(self, reqs: List[Req], is_retracted: bool = False):
......
...@@ -682,29 +682,30 @@ class TestJanusProServer(TestOpenAIVisionServer): ...@@ -682,29 +682,30 @@ class TestJanusProServer(TestOpenAIVisionServer):
pass pass
class TestLlama4Server(TestOpenAIVisionServer): ## Skip for ci test
@classmethod # class TestLlama4Server(TestOpenAIVisionServer):
def setUpClass(cls): # @classmethod
cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct" # def setUpClass(cls):
cls.base_url = DEFAULT_URL_FOR_TEST # cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
cls.api_key = "sk-123456" # cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( # cls.api_key = "sk-123456"
cls.model, # cls.process = popen_launch_server(
cls.base_url, # cls.model,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, # cls.base_url,
other_args=[ # timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
"--chat-template", # other_args=[
"llama-4", # "--chat-template",
"--mem-fraction-static", # "llama-4",
"0.8", # "--mem-fraction-static",
"--tp-size=8", # "0.8",
"--context-length=8192", # "--tp-size=8",
], # "--context-length=8192",
) # ],
cls.base_url += "/v1" # )
# cls.base_url += "/v1"
def test_video_chat_completion(self):
pass # def test_video_chat_completion(self):
# pass
class TestGemma3itServer(TestOpenAIVisionServer): class TestGemma3itServer(TestOpenAIVisionServer):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment