[Doc] Add stream flag for chat completion example (#18524)

Signed-off-by: calvin chen <120380290@qq.com>

[Doc] Add stream flag for chat completion example (#18524)
Signed-off-by: calvin chen <120380290@qq.com>
3f505233 · Calvin Chen · GitHub · 4e04eceb · 3f505233
Unverified Commit 3f505233 authored May 22, 2025 by Calvin Chen Committed by GitHub May 22, 2025
Show whitespace changes
Inline Side-by-side

Showing with 21 additions and 3 deletions

examples/online_serving/openai_chat_completion_client.py examples/online_serving/openai_chat_completion_client.py +21 -3

No files found.
--- a/examples/online_serving/openai_chat_completion_client.py
+++ b/examples/online_serving/openai_chat_completion_client.py
@@ -3,6 +3,9 @@
 NOTE: start a supported chat completion model server with `vllm serve`, e.g.
    vllm serve meta-llama/Llama-2-7b-chat-hf
 """
+
+import argparse
+
 from openai import OpenAI

 # Modify OpenAI's API key and API base to use vLLM's API server.
@@ -24,7 +27,15 @@ messages = [{
 }]


-def main():
+def parse_args():
+    parser = argparse.ArgumentParser(description="Client for vLLM API server")
+    parser.add_argument("--stream",
+                        action="store_true",
+                        help="Enable streaming response")
+    return parser.parse_args()
+
+
+def main(args):
    client = OpenAI(
        # defaults to os.environ.get("OPENAI_API_KEY")
        api_key=openai_api_key,
@@ -34,16 +45,23 @@ def main():
    models = client.models.list()
    model = models.data[0].id

+    # Chat Completion API
    chat_completion = client.chat.completions.create(
        messages=messages,
        model=model,
+        stream=args.stream,
    )

    print("-" * 50)
    print("Chat completion results:")
+    if args.stream:
+        for c in chat_completion:
+            print(c)
+    else:
        print(chat_completion)
    print("-" * 50)


 if __name__ == "__main__":
-    main()
+    args = parse_args()
+    main(args)