"vllm/vscode:/vscode.git/clone" did not exist on "42ffba11ad4597289b5ae609900a74a153fbd067"
Unverified Commit 6fa6e7ef authored by Micah Williamson's avatar Micah Williamson Committed by GitHub
Browse files

[ROCm][CI] Disable Async Scheduling For Qwen3-Next-80B-A3B-Instruct MTP Async...


[ROCm][CI] Disable Async Scheduling For Qwen3-Next-80B-A3B-Instruct MTP Async EPLB Accuracy Test (#32275)
Signed-off-by: default avatarMicah Williamson <micah.williamson@amd.com>
parent 90c08369
...@@ -18,15 +18,18 @@ wait_for_server() { ...@@ -18,15 +18,18 @@ wait_for_server() {
MODEL="Qwen/Qwen3-Next-80B-A3B-Instruct" MODEL="Qwen/Qwen3-Next-80B-A3B-Instruct"
# Set BACKENDS based on platform # Set BACKENDS and platform-specific args based on platform
if command -v rocm-smi &> /dev/null || [[ -d /opt/rocm ]] || [[ -n "${ROCM_PATH:-}" ]]; then if command -v rocm-smi &> /dev/null || [[ -d /opt/rocm ]] || [[ -n "${ROCM_PATH:-}" ]]; then
# ROCm platform # ROCm platform
BACKENDS=("allgather_reducescatter") BACKENDS=("allgather_reducescatter")
# Disable MOE padding for ROCm since it is causing eplb to fail # Disable MOE padding for ROCm since it is causing eplb to fail
export VLLM_ROCM_MOE_PADDING=0 export VLLM_ROCM_MOE_PADDING=0
PLATFORM_ARGS=("--no-async-scheduling")
echo "Disabled async scheduling for ROCm platform due to issues with spec decode."
else else
# Non-ROCm platform (CUDA/other) # Non-ROCm platform (CUDA/other)
BACKENDS=("deepep_high_throughput" "deepep_low_latency") BACKENDS=("deepep_high_throughput" "deepep_low_latency")
PLATFORM_ARGS=()
fi fi
cleanup() { cleanup() {
...@@ -54,6 +57,7 @@ for BACK in "${BACKENDS[@]}"; do ...@@ -54,6 +57,7 @@ for BACK in "${BACKENDS[@]}"; do
--trust-remote-code \ --trust-remote-code \
--max-model-len 2048 \ --max-model-len 2048 \
--gpu-memory-utilization 0.9 \ --gpu-memory-utilization 0.9 \
"${PLATFORM_ARGS[@]}" \
--port $PORT & --port $PORT &
SERVER_PID=$! SERVER_PID=$!
wait_for_server $PORT wait_for_server $PORT
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment