[Doc] Support --stream arg in openai_completion_client.py script (#18388)

Signed-off-by: googs1025 <googs1025@gmail.com>

[Doc] Support --stream arg in openai_completion_client.py script (#18388)
Signed-off-by: googs1025 <googs1025@gmail.com>
71075029 · CYJiang · GitHub · ca86a7cf · 71075029 · 71075029
Unverified Commit 71075029 authored May 22, 2025 by CYJiang Committed by GitHub May 22, 2025
3 changed files
--- a/examples/online_serving/openai_chat_completion_structured_outputs.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs.py
@@ -12,6 +12,9 @@ from enum import Enum
 from openai import BadRequestError, OpenAI
 from pydantic import BaseModel
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
 # Guided decoding by Choice (list of possible options)
 def guided_choice_completion(client: OpenAI, model: str):
@@ -134,8 +137,8 @@ def extra_backend_options_completion(client: OpenAI, model: str):
 def main():
    client: OpenAI = OpenAI(
-        base_url="http://localhost:8000/v1",
+        base_url=openai_api_base,
-        api_key="-",
+        api_key=openai_api_key,
    )
    model = client.models.list().data[0].id

--- a/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
@@ -7,11 +7,14 @@ from openai import OpenAI
 # to enforce the format of a tool call response, but it could be used for
 # any structured output within a subset of the response.
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
 def main():
    client = OpenAI(
-        base_url="http://localhost:8000/v1",
+        base_url=openai_api_base,
-        api_key="-",
+        api_key=openai_api_key,
    )
    messages = [{

--- a/examples/online_serving/openai_completion_client.py
+++ b/examples/online_serving/openai_completion_client.py
 # SPDX-License-Identifier: Apache-2.0
+import argparse
 from openai import OpenAI
 # Modify OpenAI's API key and API base to use vLLM's API server.
@@ -7,7 +9,15 @@ openai_api_key = "EMPTY"
 openai_api_base = "http://localhost:8000/v1"
-def main():
+def parse_args():
+    parser = argparse.ArgumentParser(description="Client for vLLM API server")
+    parser.add_argument("--stream",
+                        action="store_true",
+                        help="Enable streaming response")
+    return parser.parse_args()
+def main(args):
    client = OpenAI(
        # defaults to os.environ.get("OPENAI_API_KEY")
        api_key=openai_api_key,
@@ -18,18 +28,17 @@ def main():
    model = models.data[0].id
    # Completion API
-    stream = False
    completion = client.completions.create(
        model=model,
        prompt="A robot may not injure a human being",
        echo=False,
        n=2,
-        stream=stream,
+        stream=args.stream,
        logprobs=3)
    print("-" * 50)
    print("Completion results:")
-    if stream:
+    if args.stream:
        for c in completion:
            print(c)
    else:
@@ -38,4 +47,5 @@ def main():
 if __name__ == "__main__":
-    main()
+    args = parse_args()
+    main(args)