Unverified Commit e0d6b4a8 authored by Iceber Gu's avatar Iceber Gu Committed by GitHub
Browse files

[CLI] add --max-tokens to `vllm complete` (#28109)


Signed-off-by: default avatarIceber Gu <caiwei95@hotmail.com>
parent 72b1c2ae
...@@ -195,10 +195,15 @@ class CompleteCommand(CLISubcommand): ...@@ -195,10 +195,15 @@ class CompleteCommand(CLISubcommand):
def cmd(args: argparse.Namespace) -> None: def cmd(args: argparse.Namespace) -> None:
model_name, client = _interactive_cli(args) model_name, client = _interactive_cli(args)
kwargs = {
"model": model_name,
"stream": True,
}
if args.max_tokens:
kwargs["max_tokens"] = args.max_tokens
if args.quick: if args.quick:
stream = client.completions.create( stream = client.completions.create(prompt=args.quick, **kwargs)
model=model_name, prompt=args.quick, stream=True
)
_print_completion_stream(stream) _print_completion_stream(stream)
return return
...@@ -208,15 +213,18 @@ class CompleteCommand(CLISubcommand): ...@@ -208,15 +213,18 @@ class CompleteCommand(CLISubcommand):
input_prompt = input("> ") input_prompt = input("> ")
except EOFError: except EOFError:
break break
stream = client.completions.create( stream = client.completions.create(prompt=input_prompt, **kwargs)
model=model_name, prompt=input_prompt, stream=True
)
_print_completion_stream(stream) _print_completion_stream(stream)
@staticmethod @staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"""Add CLI arguments for the complete command.""" """Add CLI arguments for the complete command."""
_add_query_options(parser) _add_query_options(parser)
parser.add_argument(
"--max-tokens",
type=int,
help="Maximum number of tokens to generate per output sequence.",
)
parser.add_argument( parser.add_argument(
"-q", "-q",
"--quick", "--quick",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment