Commit 746d9b40 authored by zhuwenwen's avatar zhuwenwen
Browse files

update arg_utils.py

parent fcfc474d
...@@ -184,6 +184,7 @@ class EngineArgs: ...@@ -184,6 +184,7 @@ class EngineArgs:
logits_processor_pattern: Optional[str] = None logits_processor_pattern: Optional[str] = None
speculative_config: Optional[Dict[str, Any]] = None speculative_config: Optional[Dict[str, Any]] = None
num_speculative_heads: Optional[int] = None
qlora_adapter_name_or_path: Optional[str] = None qlora_adapter_name_or_path: Optional[str] = None
show_hidden_metrics_for_version: Optional[str] = None show_hidden_metrics_for_version: Optional[str] = None
...@@ -793,6 +794,12 @@ class EngineArgs: ...@@ -793,6 +794,12 @@ class EngineArgs:
default=None, default=None,
help='The configurations for speculative decoding.' help='The configurations for speculative decoding.'
' Should be a JSON string.') ' Should be a JSON string.')
parser.add_argument(
'--num-speculative-heads',
type=int,
default=EngineArgs.num_speculative_heads,
help='The number of speculative heads to sample from '
'the draft model in speculative decoding.')
parser.add_argument('--model-loader-extra-config', parser.add_argument('--model-loader-extra-config',
type=nullable_str, type=nullable_str,
default=EngineArgs.model_loader_extra_config, default=EngineArgs.model_loader_extra_config,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment