Add 8-GPU Test for Deepseek-V3 (#5691)

Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>

Add 8-GPU Test for Deepseek-V3 (#5691)
Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
f9fb33ef · Baizhou Zhang · GitHub · a38f6932 · f9fb33ef · f9fb33ef
Unverified Commit f9fb33ef authored Apr 27, 2025 by Baizhou Zhang Committed by GitHub Apr 27, 2025
Showing with 128 additions and 1 deletion

.github/workflows/pr-test.yml .github/workflows/pr-test.yml +1 -1

test/srt/run_suite.py test/srt/run_suite.py +1 -0

test/srt/test_full_deepseek_v3.py test/srt/test_full_deepseek_v3.py +126 -0

No files found.
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -102,7 +102,7 @@ jobs:
          bash scripts/ci_install_dependency.sh
      - name: Run test
-        timeout-minutes: 30
+        timeout-minutes: 40
        run: |
          cd test/srt
          python3 run_suite.py --suite per-commit-8-gpu

--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -93,6 +93,7 @@ suites = {
    ],
    "per-commit-8-gpu": [
        TestFile("test_local_attn.py", 250),
+        TestFile("test_full_deepseek_v3.py", 250),
    ],
    "nightly": [
        TestFile("test_nightly_gsm8k_eval.py"),

--- a/test/srt/test_full_deepseek_v3.py
+++ b/test/srt/test_full_deepseek_v3.py
+import unittest
+from types import SimpleNamespace
+import requests
+import torch
+from sglang.srt.utils import kill_process_tree
+from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
+from sglang.test.test_utils import (
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
+    is_in_ci,
+    popen_launch_server,
+    run_bench_one_batch,
+    write_github_step_summary,
+)
+FULL_DEEPSEEK_V3_MODEL_PATH = "deepseek-ai/DeepSeek-V3-0324"
+class TestDeepseekV3(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = FULL_DEEPSEEK_V3_MODEL_PATH
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        other_args = ["--trust-remote-code", "--tp", "8"]
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+    def test_gsm8k(self):
+        args = SimpleNamespace(
+            num_shots=8,
+            data_path=None,
+            num_questions=1400,
+            parallel=1400,
+            max_new_tokens=512,
+            host="http://127.0.0.1",
+            port=int(self.base_url.split(":")[-1]),
+        )
+        metrics = run_eval_few_shot_gsm8k(args)
+        print(f"{metrics=}")
+        self.assertGreater(metrics["accuracy"], 0.94)
+class TestBenchOneBatch(CustomTestCase):
+    def test_bs1(self):
+        output_throughput = run_bench_one_batch(
+            FULL_DEEPSEEK_V3_MODEL_PATH,
+            ["--trust-remote-code", "--tp", "8", "--cuda-graph-max-bs", "2"],
+        )
+        print(f"output_throughput : {output_throughput:.2f} token/s")
+        if is_in_ci():
+            write_github_step_summary(
+                f"### test_bs1\n"
+                f"output_throughput : {output_throughput:.2f} token/s\n"
+            )
+            self.assertGreater(output_throughput, 70)
+class TestDeepseekV3MTP(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = FULL_DEEPSEEK_V3_MODEL_PATH
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        other_args = [
+            "--tp",
+            "8",
+            "--trust-remote-code",
+            "--speculative-algorithm",
+            "EAGLE",
+            "--speculative-draft",
+            "lmsys/DeepSeek-V3-0324-NextN",
+            "--speculative-num-steps",
+            "5",
+            "--speculative-eagle-topk",
+            "4",
+            "--speculative-num-draft-tokens",
+            "8",
+            "--mem-fraction-static",
+            "0.6",
+        ]
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+    def test_gsm8k(self):
+        requests.get(self.base_url + "/flush_cache")
+        args = SimpleNamespace(
+            num_shots=5,
+            data_path=None,
+            num_questions=200,
+            max_new_tokens=512,
+            parallel=128,
+            host="http://127.0.0.1",
+            port=int(self.base_url.split(":")[-1]),
+        )
+        metrics = run_eval_few_shot_gsm8k(args)
+        print(f"{metrics=}")
+        self.assertGreater(metrics["accuracy"], 0.94)
+        server_info = requests.get(self.base_url + "/get_server_info")
+        avg_spec_accept_length = server_info.json()["avg_spec_accept_length"]
+        print(f"{avg_spec_accept_length=}")
+        self.assertGreater(avg_spec_accept_length, 3.2)
+if __name__ == "__main__":
+    unittest.main()