"vscode:/vscode.git/clone" did not exist on "76ddeff2931d1a5bc4192815c6ed778541e9f59e"
Unverified Commit 5b4b42c0 authored by Doug Smith's avatar Doug Smith Committed by GitHub
Browse files

Mark DBO test as flaky on b200 for Distributed B200 test (#29913)


Signed-off-by: default avatardougbtv <dosmith@redhat.com>
parent cc050558
...@@ -9,10 +9,22 @@ correctly with the DeepSeek-V2-Lite model using GSM8K evaluation. ...@@ -9,10 +9,22 @@ correctly with the DeepSeek-V2-Lite model using GSM8K evaluation.
""" """
import pytest import pytest
import torch
from tests.evals.gsm8k.gsm8k_eval import evaluate_gsm8k from tests.evals.gsm8k.gsm8k_eval import evaluate_gsm8k
from tests.utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
# Detect Blackwell / B200 (compute capability 10.x)
try:
if torch.cuda.is_available():
cap = torch.cuda.get_device_capability(0)
IS_BLACKWELL = cap[0] >= 10
else:
IS_BLACKWELL = False
except Exception:
# Be conservative: if we can't detect, don't xfail by default
IS_BLACKWELL = False
MODEL_NAME = "deepseek-ai/DeepSeek-V2-Lite-Chat" MODEL_NAME = "deepseek-ai/DeepSeek-V2-Lite-Chat"
DP_SIZE = 2 DP_SIZE = 2
...@@ -33,6 +45,13 @@ DEEPEP_BACKENDS = [ ...@@ -33,6 +45,13 @@ DEEPEP_BACKENDS = [
@pytest.mark.parametrize("all2all_backend", DEEPEP_BACKENDS) @pytest.mark.parametrize("all2all_backend", DEEPEP_BACKENDS)
@pytest.mark.xfail(
IS_BLACKWELL,
reason=(
"Temporary: DBO accuracy unstable on Blackwell "
"(doesn't meet expectation of MIN_ACCURACY = 0.62)"
),
)
def test_dbo_dp_ep_gsm8k(all2all_backend: str, num_gpus_available): def test_dbo_dp_ep_gsm8k(all2all_backend: str, num_gpus_available):
""" """
Test DBO with DP+EP using GSM8K evaluation. Test DBO with DP+EP using GSM8K evaluation.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment