Unverified Commit 1605ae12 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[CI] Minor fix for CI (#2187)

parent 1aea19f6
...@@ -17,12 +17,12 @@ dependencies = ["requests", "tqdm", "numpy", "IPython"] ...@@ -17,12 +17,12 @@ dependencies = ["requests", "tqdm", "numpy", "IPython"]
[project.optional-dependencies] [project.optional-dependencies]
runtime_common = ["aiohttp", "decord", "fastapi", runtime_common = ["aiohttp", "decord", "fastapi",
"hf_transfer", "huggingface_hub", "interegular", "hf_transfer", "huggingface_hub", "interegular", "modelscope",
"orjson", "outlines>=0.0.44,<0.1.0", "orjson", "outlines>=0.0.44,<0.1.0",
"packaging", "pillow", "prometheus-client>=0.20.0", "packaging", "pillow", "prometheus-client>=0.20.0",
"psutil", "pydantic", "python-multipart", "psutil", "pydantic", "python-multipart",
"pyzmq>=25.1.2", "torchao", "uvicorn", "uvloop", "pyzmq>=25.1.2", "torchao", "uvicorn", "uvloop",
"modelscope", "xgrammar==0.1.4"] "xgrammar>=0.1.4"]
srt = ["sglang[runtime_common]", "torch", "vllm>=0.6.3.post1"] srt = ["sglang[runtime_common]", "torch", "vllm>=0.6.3.post1"]
# HIP (Heterogeneous-computing Interface for Portability) for AMD # HIP (Heterogeneous-computing Interface for Portability) for AMD
......
...@@ -526,7 +526,7 @@ class Scheduler: ...@@ -526,7 +526,7 @@ class Scheduler:
recv_req: TokenizedGenerateReqInput, recv_req: TokenizedGenerateReqInput,
): ):
if recv_req.session_id is None or recv_req.session_id not in self.sessions: if recv_req.session_id is None or recv_req.session_id not in self.sessions:
# Check if input_embeds is present and create dummy input_ids # Create a new request
if recv_req.input_embeds is not None: if recv_req.input_embeds is not None:
# Generate fake input_ids based on the length of input_embeds # Generate fake input_ids based on the length of input_embeds
seq_length = len(recv_req.input_embeds) seq_length = len(recv_req.input_embeds)
...@@ -542,6 +542,7 @@ class Scheduler: ...@@ -542,6 +542,7 @@ class Scheduler:
input_embeds=recv_req.input_embeds, input_embeds=recv_req.input_embeds,
) )
req.tokenizer = self.tokenizer req.tokenizer = self.tokenizer
if recv_req.session_id is not None: if recv_req.session_id is not None:
req.finished_reason = FINISH_ABORT( req.finished_reason = FINISH_ABORT(
f"Invalid request: session id {recv_req.session_id} does not exist" f"Invalid request: session id {recv_req.session_id} does not exist"
...@@ -549,7 +550,7 @@ class Scheduler: ...@@ -549,7 +550,7 @@ class Scheduler:
self.waiting_queue.append(req) self.waiting_queue.append(req)
return return
else: else:
# Handle sessions # Create a new request from a previsou session
session = self.sessions[recv_req.session_id] session = self.sessions[recv_req.session_id]
req = session.create_req(recv_req, self.tokenizer) req = session.create_req(recv_req, self.tokenizer)
if isinstance(req.finished_reason, FINISH_ABORT): if isinstance(req.finished_reason, FINISH_ABORT):
......
...@@ -25,6 +25,7 @@ suites = { ...@@ -25,6 +25,7 @@ suites = {
"test_radix_attention.py", "test_radix_attention.py",
"test_retract_decode.py", "test_retract_decode.py",
"test_server_args.py", "test_server_args.py",
"test_session_control.py",
"test_skip_tokenizer_init.py", "test_skip_tokenizer_init.py",
"test_srt_engine.py", "test_srt_engine.py",
"test_srt_endpoint.py", "test_srt_endpoint.py",
...@@ -35,7 +36,6 @@ suites = { ...@@ -35,7 +36,6 @@ suites = {
"test_triton_attention_backend.py", "test_triton_attention_backend.py",
"test_update_weights.py", "test_update_weights.py",
"test_vision_openai_server.py", "test_vision_openai_server.py",
"test_session_control.py",
], ],
"sampling/penaltylib": glob.glob( "sampling/penaltylib": glob.glob(
"sampling/penaltylib/**/test_*.py", recursive=True "sampling/penaltylib/**/test_*.py", recursive=True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment