"vscode:/vscode.git/clone" did not exist on "a1e1806575f8f38c3603c7efa99cc17c7f97bdcc"
Unverified Commit 27d71045 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Auto Sync] Update scheduler.py, server_args.py (20251014) (#11623)


Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: default avatarStefan He <hebiaobuaa@gmail.com>
parent c224a4c6
...@@ -658,6 +658,12 @@ class Scheduler( ...@@ -658,6 +658,12 @@ class Scheduler(
def launch_draft_worker( def launch_draft_worker(
self, gpu_id, tp_rank, moe_ep_rank, server_args, port_args, dp_rank self, gpu_id, tp_rank, moe_ep_rank, server_args, port_args, dp_rank
): ):
if server_args.speculative_draft_load_format is not None:
server_args.load_format = server_args.speculative_draft_load_format
logger.info(
f"Using draft model load_format: '{server_args.speculative_draft_load_format}'"
)
if self.spec_algorithm.is_eagle(): if self.spec_algorithm.is_eagle():
from sglang.srt.speculative.eagle_worker import EAGLEWorker from sglang.srt.speculative.eagle_worker import EAGLEWorker
from sglang.srt.speculative.eagle_worker_v2 import EAGLEWorkerV2 from sglang.srt.speculative.eagle_worker_v2 import EAGLEWorkerV2
......
...@@ -325,6 +325,7 @@ class ServerArgs: ...@@ -325,6 +325,7 @@ class ServerArgs:
speculative_algorithm: Optional[str] = None speculative_algorithm: Optional[str] = None
speculative_draft_model_path: Optional[str] = None speculative_draft_model_path: Optional[str] = None
speculative_draft_model_revision: Optional[str] = None speculative_draft_model_revision: Optional[str] = None
speculative_draft_load_format: Optional[str] = None
speculative_num_steps: Optional[int] = None speculative_num_steps: Optional[int] = None
speculative_eagle_topk: Optional[int] = None speculative_eagle_topk: Optional[int] = None
speculative_num_draft_tokens: Optional[int] = None speculative_num_draft_tokens: Optional[int] = None
...@@ -2223,6 +2224,15 @@ class ServerArgs: ...@@ -2223,6 +2224,15 @@ class ServerArgs:
"name, a tag name, or a commit id. If unspecified, will use " "name, a tag name, or a commit id. If unspecified, will use "
"the default version.", "the default version.",
) )
parser.add_argument(
"--speculative-draft-load-format",
type=str,
default=ServerArgs.speculative_draft_load_format,
choices=LOAD_FORMAT_CHOICES,
help="The format of the draft model weights to load. "
"If not specified, will use the same format as --load-format. "
"Use 'dummy' to initialize draft model weights with random values for profiling.",
)
parser.add_argument( parser.add_argument(
"--speculative-num-steps", "--speculative-num-steps",
type=int, type=int,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment