Unverified Commit e0d6b4a8 authored by Iceber Gu's avatar Iceber Gu Committed by GitHub
Browse files

[CLI] add --max-tokens to `vllm complete` (#28109)


Signed-off-by: default avatarIceber Gu <caiwei95@hotmail.com>
parent 72b1c2ae
......@@ -195,10 +195,15 @@ class CompleteCommand(CLISubcommand):
def cmd(args: argparse.Namespace) -> None:
model_name, client = _interactive_cli(args)
kwargs = {
"model": model_name,
"stream": True,
}
if args.max_tokens:
kwargs["max_tokens"] = args.max_tokens
if args.quick:
stream = client.completions.create(
model=model_name, prompt=args.quick, stream=True
)
stream = client.completions.create(prompt=args.quick, **kwargs)
_print_completion_stream(stream)
return
......@@ -208,15 +213,18 @@ class CompleteCommand(CLISubcommand):
input_prompt = input("> ")
except EOFError:
break
stream = client.completions.create(
model=model_name, prompt=input_prompt, stream=True
)
stream = client.completions.create(prompt=input_prompt, **kwargs)
_print_completion_stream(stream)
@staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"""Add CLI arguments for the complete command."""
_add_query_options(parser)
parser.add_argument(
"--max-tokens",
type=int,
help="Maximum number of tokens to generate per output sequence.",
)
parser.add_argument(
"-q",
"--quick",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment