Commit c0f0b209 authored by zhuwenwen's avatar zhuwenwen
Browse files

remove redundant SpeculativeConfig

parent 11770f88
......@@ -766,23 +766,6 @@ class EngineArgs:
lora_group.add_argument("--default-mm-loras",
**lora_kwargs["default_mm_loras"])
# Speculative arguments
speculative_group = parser.add_argument_group(
title="SpeculativeConfig",
description=SpeculativeConfig.__doc__,
)
speculative_group.add_argument(
"--speculative-config",
type=json.loads,
default=None,
help="The configurations for speculative decoding. Should be a "
"JSON string.")
parser.add_argument(
'--num-speculative-heads',
type=int,
default=EngineArgs.num_speculative_heads,
help='The number of speculative heads to sample from '
'the draft model in speculative decoding.')
# Observability arguments
observability_kwargs = get_kwargs(ObservabilityConfig)
......@@ -874,6 +857,13 @@ class EngineArgs:
**vllm_kwargs["compilation_config"])
vllm_group.add_argument("--additional-config",
**vllm_kwargs["additional_config"])
parser.add_argument(
'--num-speculative-heads',
type=int,
default=EngineArgs.num_speculative_heads,
help='The number of speculative heads to sample from '
'the draft model in speculative decoding.')
# Other arguments
parser.add_argument('--disable-log-stats',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment