Commit 746d9b40 authored by zhuwenwen's avatar zhuwenwen
Browse files

update arg_utils.py

parent fcfc474d
...@@ -184,6 +184,7 @@ class EngineArgs: ...@@ -184,6 +184,7 @@ class EngineArgs:
logits_processor_pattern: Optional[str] = None logits_processor_pattern: Optional[str] = None
speculative_config: Optional[Dict[str, Any]] = None speculative_config: Optional[Dict[str, Any]] = None
num_speculative_heads: Optional[int] = None
qlora_adapter_name_or_path: Optional[str] = None qlora_adapter_name_or_path: Optional[str] = None
show_hidden_metrics_for_version: Optional[str] = None show_hidden_metrics_for_version: Optional[str] = None
...@@ -793,6 +794,12 @@ class EngineArgs: ...@@ -793,6 +794,12 @@ class EngineArgs:
default=None, default=None,
help='The configurations for speculative decoding.' help='The configurations for speculative decoding.'
' Should be a JSON string.') ' Should be a JSON string.')
parser.add_argument(
'--num-speculative-heads',
type=int,
default=EngineArgs.num_speculative_heads,
help='The number of speculative heads to sample from '
'the draft model in speculative decoding.')
parser.add_argument('--model-loader-extra-config', parser.add_argument('--model-loader-extra-config',
type=nullable_str, type=nullable_str,
default=EngineArgs.model_loader_extra_config, default=EngineArgs.model_loader_extra_config,
...@@ -1745,4 +1752,4 @@ def _engine_args_parser(): ...@@ -1745,4 +1752,4 @@ def _engine_args_parser():
def _async_engine_args_parser(): def _async_engine_args_parser():
return AsyncEngineArgs.add_cli_args(FlexibleArgumentParser(), return AsyncEngineArgs.add_cli_args(FlexibleArgumentParser(),
async_args_only=True) async_args_only=True)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment