fix ci issue distributed 4 gpu test (#20204)

Signed-off-by: yewentao256 <zhyanwentao@126.com>

fix ci issue distributed 4 gpu test (#20204)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
d45417b8 · Wentao Ye · GitHub · a29e62ea · d45417b8
Unverified Commit d45417b8 authored Jun 28, 2025 by Wentao Ye Committed by GitHub Jun 27, 2025
Show whitespace changes
Inline Side-by-side

Showing with 18 additions and 0 deletions

examples/offline_inference/data_parallel.py examples/offline_inference/data_parallel.py +18 -0

No files found.
--- a/examples/offline_inference/data_parallel.py
+++ b/examples/offline_inference/data_parallel.py
@@ -64,6 +64,18 @@ def parse_args():
    parser.add_argument(
        "--trust-remote-code", action="store_true", help="Trust remote code."
    )
+    parser.add_argument(
+        "--max-num-seqs",
+        type=int,
+        default=64,
+        help=("Maximum number of sequences to be processed in a single iteration."),
+    )
+    parser.add_argument(
+        "--gpu-memory-utilization",
+        type=float,
+        default=0.8,
+        help=("Fraction of GPU memory vLLM is allowed to allocate (0.0, 1.0]."),
+    )
    return parser.parse_args()
@@ -77,6 +89,8 @@ def main(
    GPUs_per_dp_rank,
    enforce_eager,
    trust_remote_code,
+    max_num_seqs,
+    gpu_memory_utilization,
 ):
    os.environ["VLLM_DP_RANK"] = str(global_dp_rank)
    os.environ["VLLM_DP_RANK_LOCAL"] = str(local_dp_rank)
@@ -127,6 +141,8 @@ def main(
        enforce_eager=enforce_eager,
        enable_expert_parallel=True,
        trust_remote_code=trust_remote_code,
+        max_num_seqs=max_num_seqs,
+        gpu_memory_utilization=gpu_memory_utilization,
    )
    outputs = llm.generate(prompts, sampling_params)
    # Print the outputs.
@@ -181,6 +197,8 @@ if __name__ == "__main__":
                tp_size,
                args.enforce_eager,
                args.trust_remote_code,
+                args.max_num_seqs,
+                args.gpu_memory_utilization,
            ),
        )
        proc.start()