Unverified Commit 35e076b3 authored by Reid's avatar Reid Committed by GitHub
Browse files

[Misc] update api_client example (#16459)


Signed-off-by: default avatarreidliu41 <reid201711@gmail.com>
Co-authored-by: default avatarreidliu41 <reid201711@gmail.com>
parent a26f59cc
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Example Python client for `vllm.entrypoints.api_server` """Example Python client for `vllm.entrypoints.api_server`
Start the demo server:
python -m vllm.entrypoints.api_server --model <model_name>
NOTE: The API server is used only for demonstration and simple performance NOTE: The API server is used only for demonstration and simple performance
benchmarks. It is not intended for production use. benchmarks. It is not intended for production use.
For production use, we recommend `vllm serve` and the OpenAI client API. For production use, we recommend `vllm serve` and the OpenAI client API.
...@@ -7,6 +10,7 @@ For production use, we recommend `vllm serve` and the OpenAI client API. ...@@ -7,6 +10,7 @@ For production use, we recommend `vllm serve` and the OpenAI client API.
import argparse import argparse
import json import json
from argparse import Namespace
from collections.abc import Iterable from collections.abc import Iterable
import requests import requests
...@@ -27,7 +31,6 @@ def post_http_request(prompt: str, ...@@ -27,7 +31,6 @@ def post_http_request(prompt: str,
pload = { pload = {
"prompt": prompt, "prompt": prompt,
"n": n, "n": n,
"use_beam_search": True,
"temperature": 0.0, "temperature": 0.0,
"max_tokens": 16, "max_tokens": 16,
"stream": stream, "stream": stream,
...@@ -55,14 +58,7 @@ def get_response(response: requests.Response) -> list[str]: ...@@ -55,14 +58,7 @@ def get_response(response: requests.Response) -> list[str]:
return output return output
if __name__ == "__main__": def main(args: Namespace):
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--port", type=int, default=8000)
parser.add_argument("--n", type=int, default=4)
parser.add_argument("--prompt", type=str, default="San Francisco is a")
parser.add_argument("--stream", action="store_true")
args = parser.parse_args()
prompt = args.prompt prompt = args.prompt
api_url = f"http://{args.host}:{args.port}/generate" api_url = f"http://{args.host}:{args.port}/generate"
n = args.n n = args.n
...@@ -83,3 +79,14 @@ if __name__ == "__main__": ...@@ -83,3 +79,14 @@ if __name__ == "__main__":
output = get_response(response) output = get_response(response)
for i, line in enumerate(output): for i, line in enumerate(output):
print(f"Beam candidate {i}: {line!r}", flush=True) print(f"Beam candidate {i}: {line!r}", flush=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--port", type=int, default=8000)
parser.add_argument("--n", type=int, default=1)
parser.add_argument("--prompt", type=str, default="San Francisco is a")
parser.add_argument("--stream", action="store_true")
args = parser.parse_args()
main(args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment