Use public model for FA3 speculative decode testing (#5152)

fd5a55cf · Yubo Wang · GitHub · 804d9f2e · fd5a55cf
Unverified Commit fd5a55cf authored Apr 08, 2025 by Yubo Wang Committed by GitHub Apr 08, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 4 deletions

test/srt/test_fa3.py test/srt/test_fa3.py +2 -4

No files found.
--- a/test/srt/test_fa3.py
+++ b/test/srt/test_fa3.py
@@ -7,8 +7,6 @@ import torch
 from sglang.srt.utils import get_device_sm, kill_process_tree
 from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
 from sglang.test.test_utils import (
-    DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
-    DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
    DEFAULT_MLA_MODEL_NAME_FOR_TEST,
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
@@ -125,7 +123,7 @@ class TestFlashAttention3MLA(BaseFlashAttentionTest):
 class TestFlashAttention3SpeculativeDecode(BaseFlashAttentionTest):
    """Test FlashAttention3 with speculative decode enabled."""

-    model = DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
+    model = "meta-llama/Llama-3.1-8B-Instruct"

    @classmethod
    def get_server_args(cls):
@@ -137,7 +135,7 @@ class TestFlashAttention3SpeculativeDecode(BaseFlashAttentionTest):
                "--speculative-algorithm",
                "EAGLE3",
                "--speculative-draft",
-                DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
+                "jamesliu1/sglang-EAGLE3-Llama-3.1-Instruct-8B",
                "--speculative-num-steps",
                "3",
                "--speculative-eagle-topk",