Unverified Commit 9681225a authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

chore: unskip several multi-gpu tests and enable sglang multi-gpu tests (#7443)


Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent a800515d
...@@ -222,7 +222,9 @@ jobs: ...@@ -222,7 +222,9 @@ jobs:
cpu_only_test_markers: 'pre_merge and sglang and gpu_0' cpu_only_test_markers: 'pre_merge and sglang and gpu_0'
run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }} run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }}
single_gpu_test_markers: 'pre_merge and sglang and gpu_1' single_gpu_test_markers: 'pre_merge and sglang and gpu_1'
run_multi_gpu_tests: false # all sglang multi-GPU tests are currently skipped; re-enable when fixed run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }}
multi_gpu_test_markers: 'pre_merge and sglang and (gpu_2 or gpu_4)'
multi_gpu_test_timeout_minutes: 60
secrets: inherit secrets: inherit
# ============================================================================ # ============================================================================
......
...@@ -33,7 +33,6 @@ pytestmark = [ ...@@ -33,7 +33,6 @@ pytestmark = [
pytest.mark.sglang, pytest.mark.sglang,
pytest.mark.e2e, pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME), pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.nightly,
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True), pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
] ]
...@@ -187,6 +186,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -187,6 +186,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(160) # 3x average @pytest.mark.timeout(160) # 3x average
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.skip(reason="DYN-2265") @pytest.mark.skip(reason="DYN-2265")
@pytest.mark.nightly
def test_request_cancellation_sglang_aggregated( def test_request_cancellation_sglang_aggregated(
request, runtime_services_dynamic_ports, predownload_models request, runtime_services_dynamic_ports, predownload_models
): ):
...@@ -292,8 +292,9 @@ def test_request_cancellation_sglang_aggregated( ...@@ -292,8 +292,9 @@ def test_request_cancellation_sglang_aggregated(
logger.info(f"{description} detected successfully") logger.info(f"{description} detected successfully")
@pytest.mark.timeout(185) # 3x average @pytest.mark.timeout(300) # 3x average
@pytest.mark.gpu_2 @pytest.mark.gpu_2
@pytest.mark.pre_merge
def test_request_cancellation_sglang_decode_cancel( def test_request_cancellation_sglang_decode_cancel(
request, runtime_services_dynamic_ports, predownload_models request, runtime_services_dynamic_ports, predownload_models
): ):
......
...@@ -425,7 +425,9 @@ def test_router_decisions_sglang_multiple_workers( ...@@ -425,7 +425,9 @@ def test_router_decisions_sglang_multiple_workers(
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parametrize("request_plane", ["tcp"], indirect=True) @pytest.mark.parametrize("request_plane", ["tcp"], indirect=True)
@pytest.mark.timeout(600) # 10 min max (multi-GPU + DP startup variance) @pytest.mark.timeout(600) # 10 min max (multi-GPU + DP startup variance)
@pytest.mark.skip(reason="DYN-2265") @pytest.mark.skip(
reason="DYN-2265"
) # Currently fails probably due to SGLang startup issues when multiple workers on same GPU; re-enable when fixed
def test_router_decisions_sglang_dp( def test_router_decisions_sglang_dp(
request, request,
runtime_services_dynamic_ports, runtime_services_dynamic_ports,
......
...@@ -237,6 +237,12 @@ class MinioService: ...@@ -237,6 +237,12 @@ class MinioService:
f"Downloading LoRA {self.config.lora_repo} to {self._temp_download_dir}" f"Downloading LoRA {self.config.lora_repo} to {self._temp_download_dir}"
) )
# Run with HF_HUB_OFFLINE unset so the download works even when
# the predownload_models fixture has already enabled offline mode.
# This only affects the subprocess env; the parent process is unchanged.
env = os.environ.copy()
env.pop("HF_HUB_OFFLINE", None)
result = subprocess.run( result = subprocess.run(
[ [
"huggingface-cli", "huggingface-cli",
...@@ -249,6 +255,7 @@ class MinioService: ...@@ -249,6 +255,7 @@ class MinioService:
], ],
capture_output=True, capture_output=True,
text=True, text=True,
env=env,
) )
if result.returncode != 0: if result.returncode != 0:
......
...@@ -76,7 +76,6 @@ sglang_configs = { ...@@ -76,7 +76,6 @@ sglang_configs = {
marks=[ marks=[
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.skip(reason="DYN-2265"),
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
env={}, env={},
...@@ -127,7 +126,6 @@ sglang_configs = { ...@@ -127,7 +126,6 @@ sglang_configs = {
marks=[ marks=[
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.skip(reason="DYN-2265"),
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
env={ env={
...@@ -137,7 +135,7 @@ sglang_configs = { ...@@ -137,7 +135,7 @@ sglang_configs = {
request_payloads=[ request_payloads=[
chat_payload_default( chat_payload_default(
expected_log=[ expected_log=[
r"ZMQ listener .* received batch with \d+ events \(seq=\d+(?:, [^)]*)?\)", r"ZMQ listener .* received batch with \d+ events \(engine_seq=\d+(?:, [^)]*)?\)",
r"Event processor for worker_id \d+ processing event: Stored\(", r"Event processor for worker_id \d+ processing event: Stored\(",
r"Selected worker: worker_type=\w+, worker_id=\d+ dp_rank=.*?, logit: ", r"Selected worker: worker_type=\w+, worker_id=\d+ dp_rank=.*?, logit: ",
] ]
......
...@@ -904,7 +904,6 @@ def test_lora_aggregated( ...@@ -904,7 +904,6 @@ def test_lora_aggregated(
@pytest.mark.timeout(600) @pytest.mark.timeout(600)
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parametrize("num_system_ports", [2], indirect=True) @pytest.mark.parametrize("num_system_ports", [2], indirect=True)
@pytest.mark.skip(reason="DYN-2265")
def test_lora_aggregated_router( def test_lora_aggregated_router(
request, request,
runtime_services_dynamic_ports, runtime_services_dynamic_ports,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment