feat: add gpt oss b200 ci (#9988)

de921733 · Yineng Zhang · GitHub · 397448eb · de921733 · de921733
Unverified Commit de921733 authored Sep 03, 2025 by Yineng Zhang Committed by GitHub Sep 03, 2025
Showing with 5 additions and 10 deletions

scripts/ci/ci_install_dependency.sh scripts/ci/ci_install_dependency.sh +2 -2

test/srt/run_suite.py test/srt/run_suite.py +1 -0

test/srt/test_gpt_oss_4gpu.py test/srt/test_gpt_oss_4gpu.py +2 -8

No files found.
--- a/scripts/ci/ci_install_dependency.sh
+++ b/scripts/ci/ci_install_dependency.sh
@@ -47,8 +47,8 @@ $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org
 if [ "$IS_BLACKWELL" = "1" ]; then
    # TODO auto determine sgl-kernel version
-    SGL_KERNEL_VERSION=0.3.2
+    SGL_KERNEL_VERSION=0.3.8
-    $PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}-cp39-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
+    $PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu128-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
 fi
 # Show current packages

--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -139,6 +139,7 @@ suites = {
    ],
    "per-commit-8-gpu-b200": [
        # add more here
+        TestFile("test_gpt_oss_4gpu.py", 600),
    ],
    "per-commit-4-gpu-deepep": [
        TestFile("ep/test_deepep_small.py", 531),

--- a/test/srt/test_gpt_oss_4gpu.py
+++ b/test/srt/test_gpt_oss_4gpu.py
@@ -9,10 +9,7 @@ class TestGptOss4Gpu(BaseTestGptOss):
            model_variant="120b",
            quantization="bf16",
            expected_score_of_reasoning_effort={
-                "low": 0.61,
+                "low": 0.60,
-                # remove to speed up
-                # "medium": 0.61,
-                # "high": 0.61,
            },
            other_args=["--tp", "4", "--cuda-graph-max-bs", "200"],
        )
@@ -22,10 +19,7 @@ class TestGptOss4Gpu(BaseTestGptOss):
            model_variant="120b",
            quantization="mxfp4",
            expected_score_of_reasoning_effort={
-                "low": 0.61,
+                "low": 0.60,
-                # remove to speed up
-                # "medium": 0.61,
-                # "high": 0.61,
            },
            other_args=[
                "--tp",