Unverified Commit d45417b8 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

fix ci issue distributed 4 gpu test (#20204)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent a29e62ea
...@@ -64,6 +64,18 @@ def parse_args(): ...@@ -64,6 +64,18 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--trust-remote-code", action="store_true", help="Trust remote code." "--trust-remote-code", action="store_true", help="Trust remote code."
) )
parser.add_argument(
"--max-num-seqs",
type=int,
default=64,
help=("Maximum number of sequences to be processed in a single iteration."),
)
parser.add_argument(
"--gpu-memory-utilization",
type=float,
default=0.8,
help=("Fraction of GPU memory vLLM is allowed to allocate (0.0, 1.0]."),
)
return parser.parse_args() return parser.parse_args()
...@@ -77,6 +89,8 @@ def main( ...@@ -77,6 +89,8 @@ def main(
GPUs_per_dp_rank, GPUs_per_dp_rank,
enforce_eager, enforce_eager,
trust_remote_code, trust_remote_code,
max_num_seqs,
gpu_memory_utilization,
): ):
os.environ["VLLM_DP_RANK"] = str(global_dp_rank) os.environ["VLLM_DP_RANK"] = str(global_dp_rank)
os.environ["VLLM_DP_RANK_LOCAL"] = str(local_dp_rank) os.environ["VLLM_DP_RANK_LOCAL"] = str(local_dp_rank)
...@@ -127,6 +141,8 @@ def main( ...@@ -127,6 +141,8 @@ def main(
enforce_eager=enforce_eager, enforce_eager=enforce_eager,
enable_expert_parallel=True, enable_expert_parallel=True,
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
max_num_seqs=max_num_seqs,
gpu_memory_utilization=gpu_memory_utilization,
) )
outputs = llm.generate(prompts, sampling_params) outputs = llm.generate(prompts, sampling_params)
# Print the outputs. # Print the outputs.
...@@ -181,6 +197,8 @@ if __name__ == "__main__": ...@@ -181,6 +197,8 @@ if __name__ == "__main__":
tp_size, tp_size,
args.enforce_eager, args.enforce_eager,
args.trust_remote_code, args.trust_remote_code,
args.max_num_seqs,
args.gpu_memory_utilization,
), ),
) )
proc.start() proc.start()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment