Unverified Commit 9681225a authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

chore: unskip several multi-gpu tests and enable sglang multi-gpu tests (#7443)


Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent a800515d
......@@ -222,7 +222,9 @@ jobs:
cpu_only_test_markers: 'pre_merge and sglang and gpu_0'
run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }}
single_gpu_test_markers: 'pre_merge and sglang and gpu_1'
run_multi_gpu_tests: false # all sglang multi-GPU tests are currently skipped; re-enable when fixed
run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }}
multi_gpu_test_markers: 'pre_merge and sglang and (gpu_2 or gpu_4)'
multi_gpu_test_timeout_minutes: 60
secrets: inherit
# ============================================================================
......
......@@ -33,7 +33,6 @@ pytestmark = [
pytest.mark.sglang,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.nightly,
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
]
......@@ -187,6 +186,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(160) # 3x average
@pytest.mark.gpu_1
@pytest.mark.skip(reason="DYN-2265")
@pytest.mark.nightly
def test_request_cancellation_sglang_aggregated(
request, runtime_services_dynamic_ports, predownload_models
):
......@@ -292,8 +292,9 @@ def test_request_cancellation_sglang_aggregated(
logger.info(f"{description} detected successfully")
@pytest.mark.timeout(185) # 3x average
@pytest.mark.timeout(300) # 3x average
@pytest.mark.gpu_2
@pytest.mark.pre_merge
def test_request_cancellation_sglang_decode_cancel(
request, runtime_services_dynamic_ports, predownload_models
):
......
......@@ -425,7 +425,9 @@ def test_router_decisions_sglang_multiple_workers(
@pytest.mark.pre_merge
@pytest.mark.parametrize("request_plane", ["tcp"], indirect=True)
@pytest.mark.timeout(600) # 10 min max (multi-GPU + DP startup variance)
@pytest.mark.skip(reason="DYN-2265")
@pytest.mark.skip(
reason="DYN-2265"
) # Currently fails probably due to SGLang startup issues when multiple workers on same GPU; re-enable when fixed
def test_router_decisions_sglang_dp(
request,
runtime_services_dynamic_ports,
......
......@@ -237,6 +237,12 @@ class MinioService:
f"Downloading LoRA {self.config.lora_repo} to {self._temp_download_dir}"
)
# Run with HF_HUB_OFFLINE unset so the download works even when
# the predownload_models fixture has already enabled offline mode.
# This only affects the subprocess env; the parent process is unchanged.
env = os.environ.copy()
env.pop("HF_HUB_OFFLINE", None)
result = subprocess.run(
[
"huggingface-cli",
......@@ -249,6 +255,7 @@ class MinioService:
],
capture_output=True,
text=True,
env=env,
)
if result.returncode != 0:
......
......@@ -76,7 +76,6 @@ sglang_configs = {
marks=[
pytest.mark.gpu_2,
pytest.mark.pre_merge,
pytest.mark.skip(reason="DYN-2265"),
],
model="Qwen/Qwen3-0.6B",
env={},
......@@ -127,7 +126,6 @@ sglang_configs = {
marks=[
pytest.mark.gpu_2,
pytest.mark.pre_merge,
pytest.mark.skip(reason="DYN-2265"),
],
model="Qwen/Qwen3-0.6B",
env={
......@@ -137,7 +135,7 @@ sglang_configs = {
request_payloads=[
chat_payload_default(
expected_log=[
r"ZMQ listener .* received batch with \d+ events \(seq=\d+(?:, [^)]*)?\)",
r"ZMQ listener .* received batch with \d+ events \(engine_seq=\d+(?:, [^)]*)?\)",
r"Event processor for worker_id \d+ processing event: Stored\(",
r"Selected worker: worker_type=\w+, worker_id=\d+ dp_rank=.*?, logit: ",
]
......
......@@ -904,7 +904,6 @@ def test_lora_aggregated(
@pytest.mark.timeout(600)
@pytest.mark.pre_merge
@pytest.mark.parametrize("num_system_ports", [2], indirect=True)
@pytest.mark.skip(reason="DYN-2265")
def test_lora_aggregated_router(
request,
runtime_services_dynamic_ports,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment