Unverified Commit 15ddd843 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Add retry for flaky tests in CI (#4755)

parent 52029bd1
...@@ -7,6 +7,7 @@ from sglang.srt.utils import kill_process_tree ...@@ -7,6 +7,7 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
...@@ -18,7 +19,7 @@ The story should span multiple events, challenges, and character developments ov ...@@ -18,7 +19,7 @@ The story should span multiple events, challenges, and character developments ov
""" """
class TestMatchedStop(unittest.TestCase): class TestMatchedStop(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
......
...@@ -7,11 +7,12 @@ from sglang.test.test_utils import ( ...@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestEnableMetrics(unittest.TestCase): class TestEnableMetrics(CustomTestCase):
def test_metrics_enabled(self): def test_metrics_enabled(self):
"""Test that metrics endpoint returns data when enabled""" """Test that metrics endpoint returns data when enabled"""
process = popen_launch_server( process = popen_launch_server(
......
...@@ -7,11 +7,12 @@ from sglang.test.test_utils import ( ...@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST, DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestMLA(unittest.TestCase): class TestMLA(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
......
...@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k ...@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestMLADeepseekV3(unittest.TestCase): class TestMLADeepseekV3(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"
...@@ -48,7 +49,7 @@ class TestMLADeepseekV3(unittest.TestCase): ...@@ -48,7 +49,7 @@ class TestMLADeepseekV3(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestDeepseekV3MTP(unittest.TestCase): class TestDeepseekV3MTP(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"
......
...@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k ...@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestFlashinferMLA(unittest.TestCase): class TestFlashinferMLA(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"
...@@ -55,7 +56,7 @@ class TestFlashinferMLA(unittest.TestCase): ...@@ -55,7 +56,7 @@ class TestFlashinferMLA(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestFlashinferMLANoRagged(unittest.TestCase): class TestFlashinferMLANoRagged(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"
...@@ -99,7 +100,7 @@ class TestFlashinferMLANoRagged(unittest.TestCase): ...@@ -99,7 +100,7 @@ class TestFlashinferMLANoRagged(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestFlashinferMLAMTP(unittest.TestCase): class TestFlashinferMLAMTP(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"
......
...@@ -7,11 +7,12 @@ from sglang.test.test_utils import ( ...@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST, DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestMLA(unittest.TestCase): class TestMLA(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST
......
...@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k ...@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestMLADeepseekV3ChannelInt8(unittest.TestCase): class TestMLADeepseekV3ChannelInt8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test" cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
...@@ -48,7 +49,7 @@ class TestMLADeepseekV3ChannelInt8(unittest.TestCase): ...@@ -48,7 +49,7 @@ class TestMLADeepseekV3ChannelInt8(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestDeepseekV3MTPChannelInt8(unittest.TestCase): class TestDeepseekV3MTPChannelInt8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test" cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
...@@ -109,7 +110,7 @@ class TestDeepseekV3MTPChannelInt8(unittest.TestCase): ...@@ -109,7 +110,7 @@ class TestDeepseekV3MTPChannelInt8(unittest.TestCase):
self.assertGreater(avg_spec_accept_length, 2.5) self.assertGreater(avg_spec_accept_length, 2.5)
class TestMLADeepseekV3BlockInt8(unittest.TestCase): class TestMLADeepseekV3BlockInt8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test" cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"
...@@ -144,7 +145,7 @@ class TestMLADeepseekV3BlockInt8(unittest.TestCase): ...@@ -144,7 +145,7 @@ class TestMLADeepseekV3BlockInt8(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestDeepseekV3MTPBlockInt8(unittest.TestCase): class TestDeepseekV3MTPBlockInt8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test" cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"
......
...@@ -8,11 +8,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k ...@@ -8,11 +8,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDeepseekTP2(unittest.TestCase): class TestDeepseekTP2(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"
......
...@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.modelopt_quant import ( ...@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.modelopt_quant import (
ModelOptFp8Config, ModelOptFp8Config,
ModelOptFp8KVCacheMethod, ModelOptFp8KVCacheMethod,
) )
from sglang.test.test_utils import CustomTestCase
class TestModelOptFp8KVCacheMethod(unittest.TestCase): class TestModelOptFp8KVCacheMethod(CustomTestCase):
def test_kv_cache_method_initialization(self): def test_kv_cache_method_initialization(self):
"""Test that ModelOptFp8KVCacheMethod can be instantiated and """Test that ModelOptFp8KVCacheMethod can be instantiated and
inherits from BaseKVCacheMethod.""" inherits from BaseKVCacheMethod."""
......
...@@ -5,9 +5,10 @@ import unittest ...@@ -5,9 +5,10 @@ import unittest
from unittest import mock from unittest import mock
from sglang.srt.utils import prepare_model_and_tokenizer from sglang.srt.utils import prepare_model_and_tokenizer
from sglang.test.test_utils import CustomTestCase
class TestDownloadFromModelScope(unittest.TestCase): class TestDownloadFromModelScope(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
......
...@@ -7,11 +7,12 @@ from sglang.test.test_utils import ( ...@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST, DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDeepEPMoE(unittest.TestCase): class TestDeepEPMoE(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
......
...@@ -7,11 +7,12 @@ from sglang.test.test_utils import ( ...@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST, DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestEpMoE(unittest.TestCase): class TestEpMoE(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
...@@ -59,7 +60,7 @@ class TestEpMoE(unittest.TestCase): ...@@ -59,7 +60,7 @@ class TestEpMoE(unittest.TestCase):
self.assertGreater(metrics["score"], 0.8) self.assertGreater(metrics["score"], 0.8)
class TestEpMoEFP8(unittest.TestCase): class TestEpMoEFP8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
......
...@@ -12,13 +12,14 @@ from sglang.test.test_utils import ( ...@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
DEFAULT_MOE_MODEL_NAME_FOR_TEST, DEFAULT_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
popen_launch_server, popen_launch_server,
write_github_step_summary, write_github_step_summary,
) )
class TestMoEEvalAccuracyLarge(unittest.TestCase): class TestMoEEvalAccuracyLarge(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MOE_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MOE_MODEL_NAME_FOR_TEST
......
...@@ -15,6 +15,7 @@ from sglang.test.test_utils import ( ...@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2, DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
popen_launch_server, popen_launch_server,
write_github_step_summary, write_github_step_summary,
...@@ -129,7 +130,7 @@ def check_model_scores(results): ...@@ -129,7 +130,7 @@ def check_model_scores(results):
raise AssertionError("\n".join(failed_models)) raise AssertionError("\n".join(failed_models))
class TestNightlyGsm8KEval(unittest.TestCase): class TestNightlyGsm8KEval(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model_groups = [ cls.model_groups = [
......
...@@ -14,11 +14,12 @@ from sglang.test.test_utils import ( ...@@ -14,11 +14,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2, DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
) )
class TestNightlyHumanEval(unittest.TestCase): class TestNightlyHumanEval(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
if is_in_ci(): if is_in_ci():
......
...@@ -7,11 +7,12 @@ from sglang.test.test_utils import ( ...@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestEvalAccuracyLarge(unittest.TestCase): class TestEvalAccuracyLarge(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
......
...@@ -2,12 +2,13 @@ import unittest ...@@ -2,12 +2,13 @@ import unittest
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
CustomTestCase,
run_bench_serving, run_bench_serving,
run_mmlu_test, run_mmlu_test,
) )
class TestNoChunkedPrefill(unittest.TestCase): class TestNoChunkedPrefill(CustomTestCase):
def test_no_chunked_prefill(self): def test_no_chunked_prefill(self):
run_mmlu_test( run_mmlu_test(
......
...@@ -6,10 +6,10 @@ python3 test_overlap_schedule.py ...@@ -6,10 +6,10 @@ python3 test_overlap_schedule.py
import unittest import unittest
from sglang.test.test_utils import run_mmlu_test from sglang.test.test_utils import CustomTestCase, run_mmlu_test
class TestOverlapSchedule(unittest.TestCase): class TestOverlapSchedule(CustomTestCase):
def test_no_radix_attention_chunked_prefill(self): def test_no_radix_attention_chunked_prefill(self):
run_mmlu_test( run_mmlu_test(
disable_radix_cache=True, chunked_prefill_size=32, disable_overlap=True disable_radix_cache=True, chunked_prefill_size=32, disable_overlap=True
......
...@@ -18,11 +18,12 @@ from sglang.test.test_utils import ( ...@@ -18,11 +18,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestOpenAIServer(unittest.TestCase): class TestOpenAIServer(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
...@@ -541,7 +542,7 @@ The SmartHome Mini is a compact smart home assistant available in black or white ...@@ -541,7 +542,7 @@ The SmartHome Mini is a compact smart home assistant available in black or white
# EBNF Test Class: TestOpenAIServerEBNF # EBNF Test Class: TestOpenAIServerEBNF
# Launches the server with xgrammar, has only EBNF tests # Launches the server with xgrammar, has only EBNF tests
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
class TestOpenAIServerEBNF(unittest.TestCase): class TestOpenAIServerEBNF(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
...@@ -624,7 +625,7 @@ class TestOpenAIServerEBNF(unittest.TestCase): ...@@ -624,7 +625,7 @@ class TestOpenAIServerEBNF(unittest.TestCase):
) )
class TestOpenAIEmbedding(unittest.TestCase): class TestOpenAIEmbedding(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST
......
...@@ -8,11 +8,12 @@ from sglang.test.test_utils import ( ...@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestPageSize(unittest.TestCase): class TestPageSize(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
os.environ["SGLANG_DEBUG_MEMORY_POOL"] = "1" os.environ["SGLANG_DEBUG_MEMORY_POOL"] = "1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment