[Misc] update api_client example (#16459)

Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>

[Misc] update api_client example (#16459)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
35e076b3 · Reid · GitHub · a26f59cc · 35e076b3
Unverified Commit 35e076b3 authored Apr 11, 2025 by Reid Committed by GitHub Apr 11, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 16 additions and 9 deletions

examples/online_serving/api_client.py examples/online_serving/api_client.py +16 -9

No files found.
--- a/examples/online_serving/api_client.py
+++ b/examples/online_serving/api_client.py
 # SPDX-License-Identifier: Apache-2.0
 """Example Python client for `vllm.entrypoints.api_server`
+Start the demo server:
+    python -m vllm.entrypoints.api_server --model <model_name>
 NOTE: The API server is used only for demonstration and simple performance
 benchmarks. It is not intended for production use.
 For production use, we recommend `vllm serve` and the OpenAI client API.
@@ -7,6 +10,7 @@ For production use, we recommend `vllm serve` and the OpenAI client API.
 import argparse
 import json
+from argparse import Namespace
 from collections.abc import Iterable
 import requests
@@ -27,7 +31,6 @@ def post_http_request(prompt: str,
    pload = {
        "prompt": prompt,
        "n": n,
-        "use_beam_search": True,
        "temperature": 0.0,
        "max_tokens": 16,
        "stream": stream,
@@ -55,14 +58,7 @@ def get_response(response: requests.Response) -> list[str]:
    return output
-if __name__ == "__main__":
+def main(args: Namespace):
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", type=str, default="localhost")
-    parser.add_argument("--port", type=int, default=8000)
-    parser.add_argument("--n", type=int, default=4)
-    parser.add_argument("--prompt", type=str, default="San Francisco is a")
-    parser.add_argument("--stream", action="store_true")
-    args = parser.parse_args()
    prompt = args.prompt
    api_url = f"http://{args.host}:{args.port}/generate"
    n = args.n
@@ -83,3 +79,14 @@ if __name__ == "__main__":
        output = get_response(response)
        for i, line in enumerate(output):
            print(f"Beam candidate {i}: {line!r}", flush=True)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--port", type=int, default=8000)
+    parser.add_argument("--n", type=int, default=1)
+    parser.add_argument("--prompt", type=str, default="San Francisco is a")
+    parser.add_argument("--stream", action="store_true")
+    args = parser.parse_args()
+    main(args)