Convert `examples` to `ruff-format` (#18400)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Convert `examples` to `ruff-format` (#18400)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
27bebcd8 · Harry Mellor · GitHub · e7523c2e · 27bebcd8 · 27bebcd8
Unverified Commit 27bebcd8 authored May 26, 2025 by Harry Mellor Committed by GitHub May 26, 2025
20 changed files
--- a/examples/offline_inference/torchrun_example.py
+++ b/examples/offline_inference/torchrun_example.py
@@ -45,8 +45,7 @@ if dist.get_rank() == 0:
    for output in outputs:
        prompt = output.prompt
        generated_text = output.outputs[0].text
-        print(f"Prompt: {prompt!r}\n"
+        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}\n")
-              f"Generated text: {generated_text!r}\n")
        print("-" * 50)
    """
 Further tips:

--- a/examples/offline_inference/tpu.py
+++ b/examples/offline_inference/tpu.py
@@ -20,10 +20,12 @@ sampling_params = SamplingParams(temperature=0, top_p=1.0, n=N, max_tokens=16)
 def main():
    # Set `enforce_eager=True` to avoid ahead-of-time compilation.
    # In real workloads, `enforace_eager` should be `False`.
-    llm = LLM(model="Qwen/Qwen2-1.5B-Instruct",
+    llm = LLM(
-              max_num_batched_tokens=64,
+        model="Qwen/Qwen2-1.5B-Instruct",
-              max_num_seqs=4,
+        max_num_batched_tokens=64,
-              max_model_len=128)
+        max_num_seqs=4,
+        max_model_len=128,
+    )
    outputs = llm.generate(prompts, sampling_params)
    print("-" * 50)
    for output, answer in zip(outputs, answers):

--- a/examples/offline_inference/vision_language.py
+++ b/examples/offline_inference/vision_language.py
--- a/examples/offline_inference/vision_language_embedding.py
+++ b/examples/offline_inference/vision_language_embedding.py
@@ -6,6 +6,7 @@ the correct prompt format on vision language models for multimodal embedding.
 For most models, the prompt format should follow corresponding examples
 on HuggingFace model repository.
 """
 from argparse import Namespace
 from dataclasses import asdict
 from typing import Literal, NamedTuple, Optional, TypedDict, Union, get_args
@@ -44,19 +45,17 @@ class ModelRequestData(NamedTuple):
 def run_e5_v(query: Query) -> ModelRequestData:
-    llama3_template = '<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n'  # noqa: E501
+    llama3_template = "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n"  # noqa: E501
    if query["modality"] == "text":
        text = query["text"]
-        prompt = llama3_template.format(
+        prompt = llama3_template.format(f"{text}\nSummary above sentence in one word: ")
-            f"{text}\nSummary above sentence in one word: ")
        image = None
    elif query["modality"] == "image":
-        prompt = llama3_template.format(
+        prompt = llama3_template.format("<image>\nSummary above image in one word: ")
-            "<image>\nSummary above image in one word: ")
        image = query["image"]
    else:
-        modality = query['modality']
+        modality = query["modality"]
        raise ValueError(f"Unsupported query modality: '{modality}'")
    engine_args = EngineArgs(
@@ -83,10 +82,12 @@ def run_vlm2vec(query: Query) -> ModelRequestData:
        image = query["image"]
    elif query["modality"] == "text+image":
        text = query["text"]
-        prompt = f"<|image_1|> Represent the given image with the following question: {text}"  # noqa: E501
+        prompt = (
+            f"<|image_1|> Represent the given image with the following question: {text}"  # noqa: E501
+        )
        image = query["image"]
    else:
-        modality = query['modality']
+        modality = query["modality"]
        raise ValueError(f"Unsupported query modality: '{modality}'")
    engine_args = EngineArgs(
@@ -136,7 +137,8 @@ def run_encode(model: str, modality: QueryModality, seed: Optional[int]):
    # Disable other modalities to save memory
    default_limits = {"image": 0, "video": 0, "audio": 0}
    req_data.engine_args.limit_mm_per_prompt = default_limits | dict(
-        req_data.engine_args.limit_mm_per_prompt or {})
+        req_data.engine_args.limit_mm_per_prompt or {}
+    )
    engine_args = asdict(req_data.engine_args) | {"seed": seed}
    llm = LLM(**engine_args)
@@ -145,10 +147,12 @@ def run_encode(model: str, modality: QueryModality, seed: Optional[int]):
    if req_data.image is not None:
        mm_data["image"] = req_data.image
-    outputs = llm.embed({
+    outputs = llm.embed(
-        "prompt": req_data.prompt,
+        {
-        "multi_modal_data": mm_data,
+            "prompt": req_data.prompt,
-    })
+            "multi_modal_data": mm_data,
+        }
+    )
    print("-" * 50)
    for output in outputs:
@@ -164,23 +168,30 @@ model_example_map = {
 def parse_args():
    parser = FlexibleArgumentParser(
-        description='Demo on using vLLM for offline inference with '
+        description="Demo on using vLLM for offline inference with "
-        'vision language models for multimodal embedding')
+        "vision language models for multimodal embedding"
-    parser.add_argument('--model-name',
+    )
-                        '-m',
+    parser.add_argument(
-                        type=str,
+        "--model-name",
-                        default="vlm2vec",
+        "-m",
-                        choices=model_example_map.keys(),
+        type=str,
-                        help='The name of the embedding model.')
+        default="vlm2vec",
-    parser.add_argument('--modality',
+        choices=model_example_map.keys(),
-                        type=str,
+        help="The name of the embedding model.",
-                        default="image",
+    )
-                        choices=get_args(QueryModality),
+    parser.add_argument(
-                        help='Modality of the input.')
+        "--modality",
-    parser.add_argument("--seed",
+        type=str,
-                        type=int,
+        default="image",
-                        default=None,
+        choices=get_args(QueryModality),
-                        help="Set the seed when initializing `vllm.LLM`.")
+        help="Modality of the input.",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=None,
+        help="Set the seed when initializing `vllm.LLM`.",
+    )
    return parser.parse_args()

--- a/examples/offline_inference/vision_language_multi_image.py
+++ b/examples/offline_inference/vision_language_multi_image.py
--- a/examples/online_serving/api_client.py
+++ b/examples/online_serving/api_client.py
@@ -17,16 +17,15 @@ import requests
 def clear_line(n: int = 1) -> None:
-    LINE_UP = '\033[1A'
+    LINE_UP = "\033[1A"
-    LINE_CLEAR = '\x1b[2K'
+    LINE_CLEAR = "\x1b[2K"
    for _ in range(n):
        print(LINE_UP, end=LINE_CLEAR, flush=True)
-def post_http_request(prompt: str,
+def post_http_request(
-                      api_url: str,
+    prompt: str, api_url: str, n: int = 1, stream: bool = False
-                      n: int = 1,
+) -> requests.Response:
-                      stream: bool = False) -> requests.Response:
    headers = {"User-Agent": "Test Client"}
    pload = {
        "prompt": prompt,
@@ -35,17 +34,14 @@ def post_http_request(prompt: str,
        "max_tokens": 16,
        "stream": stream,
    }
-    response = requests.post(api_url,
+    response = requests.post(api_url, headers=headers, json=pload, stream=stream)
-                             headers=headers,
-                             json=pload,
-                             stream=stream)
    return response
 def get_streaming_response(response: requests.Response) -> Iterable[list[str]]:
-    for chunk in response.iter_lines(chunk_size=8192,
+    for chunk in response.iter_lines(
-                                     decode_unicode=False,
+        chunk_size=8192, decode_unicode=False, delimiter=b"\n"
-                                     delimiter=b"\n"):
+    ):
        if chunk:
            data = json.loads(chunk.decode("utf-8"))
            output = data["text"]

--- a/examples/online_serving/cohere_rerank_client.py
+++ b/examples/online_serving/cohere_rerank_client.py
@@ -6,6 +6,7 @@ Note that `pip install cohere` is needed to run this example.
 run: vllm serve BAAI/bge-reranker-base
 """
 from typing import Union
 import cohere
@@ -16,28 +17,28 @@ model = "BAAI/bge-reranker-base"
 query = "What is the capital of France?"
 documents = [
-    "The capital of France is Paris", "Reranking is fun!",
+    "The capital of France is Paris",
-    "vLLM is an open-source framework for fast AI serving"
+    "Reranking is fun!",
+    "vLLM is an open-source framework for fast AI serving",
 ]
-def cohere_rerank(client: Union[Client, ClientV2], model: str, query: str,
+def cohere_rerank(
-                  documents: list[str]) -> dict:
+    client: Union[Client, ClientV2], model: str, query: str, documents: list[str]
+) -> dict:
    return client.rerank(model=model, query=query, documents=documents)
 def main():
    # cohere v1 client
-    cohere_v1 = cohere.Client(base_url="http://localhost:8000",
+    cohere_v1 = cohere.Client(base_url="http://localhost:8000", api_key="sk-fake-key")
-                              api_key="sk-fake-key")
    rerank_v1_result = cohere_rerank(cohere_v1, model, query, documents)
    print("-" * 50)
    print("rerank_v1_result:\n", rerank_v1_result)
    print("-" * 50)
    # or the v2
-    cohere_v2 = cohere.ClientV2("sk-fake-key",
+    cohere_v2 = cohere.ClientV2("sk-fake-key", base_url="http://localhost:8000")
-                                base_url="http://localhost:8000")
    rerank_v2_result = cohere_rerank(cohere_v2, model, query, documents)
    print("rerank_v2_result:\n", rerank_v2_result)
    print("-" * 50)

--- a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
+++ b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
--- a/examples/online_serving/gradio_openai_chatbot_webserver.py
+++ b/examples/online_serving/gradio_openai_chatbot_webserver.py
--- a/examples/online_serving/gradio_webserver.py
+++ b/examples/online_serving/gradio_webserver.py
--- a/examples/online_serving/jinaai_rerank_client.py
+++ b/examples/online_serving/jinaai_rerank_client.py
--- a/examples/online_serving/kv_events_subscriber.py
+++ b/examples/online_serving/kv_events_subscriber.py
--- a/examples/online_serving/openai_chat_completion_client.py
+++ b/examples/online_serving/openai_chat_completion_client.py
--- a/examples/online_serving/openai_chat_completion_client_for_multimodal.py
+++ b/examples/online_serving/openai_chat_completion_client_for_multimodal.py
--- a/examples/online_serving/openai_chat_completion_client_with_tools.py
+++ b/examples/online_serving/openai_chat_completion_client_with_tools.py
--- a/examples/online_serving/openai_chat_completion_client_with_tools_required.py
+++ b/examples/online_serving/openai_chat_completion_client_with_tools_required.py
--- a/examples/online_serving/openai_chat_completion_structured_outputs.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs.py
--- a/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
--- a/examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py
--- a/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
+++ b/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py