Unverified Commit 15ddd843 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Add retry for flaky tests in CI (#4755)

parent 52029bd1
...@@ -20,7 +20,7 @@ import torch ...@@ -20,7 +20,7 @@ import torch
from transformers import AutoConfig, AutoTokenizer from transformers import AutoConfig, AutoTokenizer
from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
from sglang.test.test_utils import get_similarities, is_in_ci from sglang.test.test_utils import CustomTestCase, get_similarities, is_in_ci
MODELS = [ MODELS = [
("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5), ("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5),
...@@ -31,7 +31,7 @@ MODELS = [ ...@@ -31,7 +31,7 @@ MODELS = [
TORCH_DTYPES = [torch.float16] TORCH_DTYPES = [torch.float16]
class TestEmbeddingModels(unittest.TestCase): class TestEmbeddingModels(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
......
...@@ -33,7 +33,7 @@ from sglang.test.runners import ( ...@@ -33,7 +33,7 @@ from sglang.test.runners import (
SRTRunner, SRTRunner,
check_close_model_outputs, check_close_model_outputs,
) )
from sglang.test.test_utils import is_in_ci from sglang.test.test_utils import CustomTestCase, is_in_ci
@dataclasses.dataclass @dataclasses.dataclass
...@@ -71,7 +71,7 @@ ALL_OTHER_MODELS = [ ...@@ -71,7 +71,7 @@ ALL_OTHER_MODELS = [
TORCH_DTYPES = [torch.float16] TORCH_DTYPES = [torch.float16]
class TestGenerationModels(unittest.TestCase): class TestGenerationModels(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
......
...@@ -19,7 +19,7 @@ import unittest ...@@ -19,7 +19,7 @@ import unittest
import torch import torch
from sglang.test.runners import HFRunner, SRTRunner from sglang.test.runners import HFRunner, SRTRunner
from sglang.test.test_utils import get_similarities from sglang.test.test_utils import CustomTestCase, get_similarities
TEXTS = "two Subway Series sandwiches with meats, cheese, lettuce, tomatoes, and onions on a black background, accompanied by the Subway Series logo, highlighting a new sandwich series." TEXTS = "two Subway Series sandwiches with meats, cheese, lettuce, tomatoes, and onions on a black background, accompanied by the Subway Series logo, highlighting a new sandwich series."
IMAGES = "https://huggingface.co/datasets/liuhaotian/llava-bench-in-the-wild/resolve/main/images/023.jpg" IMAGES = "https://huggingface.co/datasets/liuhaotian/llava-bench-in-the-wild/resolve/main/images/023.jpg"
...@@ -31,7 +31,7 @@ MODELS = [ ...@@ -31,7 +31,7 @@ MODELS = [
TORCH_DTYPES = [torch.float16] TORCH_DTYPES = [torch.float16]
class TestQmeQwenModels(unittest.TestCase): class TestQmeQwenModels(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
mp.set_start_method("spawn", force=True) mp.set_start_method("spawn", force=True)
......
...@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval ...@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestGrok(unittest.TestCase): class TestGrok(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmzheng/grok-1" cls.model = "lmzheng/grok-1"
......
...@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval ...@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestQwen2(unittest.TestCase): class TestQwen2(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "Qwen/Qwen2-7B-Instruct" cls.model = "Qwen/Qwen2-7B-Instruct"
...@@ -41,7 +42,7 @@ class TestQwen2(unittest.TestCase): ...@@ -41,7 +42,7 @@ class TestQwen2(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.78) self.assertGreater(metrics["accuracy"], 0.78)
class TestQwen2FP8(unittest.TestCase): class TestQwen2FP8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "neuralmagic/Qwen2-7B-Instruct-FP8" cls.model = "neuralmagic/Qwen2-7B-Instruct-FP8"
......
...@@ -18,6 +18,7 @@ import unittest ...@@ -18,6 +18,7 @@ import unittest
import torch import torch
from sglang.test.runners import HFRunner, SRTRunner from sglang.test.runners import HFRunner, SRTRunner
from sglang.test.test_utils import CustomTestCase
MODELS = [ MODELS = [
("LxzGordon/URM-LLaMa-3.1-8B", 1, 4e-2), ("LxzGordon/URM-LLaMa-3.1-8B", 1, 4e-2),
...@@ -41,7 +42,7 @@ CONVS = [ ...@@ -41,7 +42,7 @@ CONVS = [
] ]
class TestRewardModels(unittest.TestCase): class TestRewardModels(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
......
...@@ -5,10 +5,10 @@ from concurrent.futures import ThreadPoolExecutor ...@@ -5,10 +5,10 @@ from concurrent.futures import ThreadPoolExecutor
import requests import requests
from sglang.test.test_utils import run_and_check_memory_leak from sglang.test.test_utils import CustomTestCase, run_and_check_memory_leak
class TestAbort(unittest.TestCase): class TestAbort(CustomTestCase):
def workload_func(self, base_url, model): def workload_func(self, base_url, model):
def process_func(): def process_func():
def run_one(_): def run_one(_):
......
...@@ -7,11 +7,12 @@ from sglang.test.test_utils import ( ...@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST, DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestAWQ(unittest.TestCase): class TestAWQ(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST cls.model = DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST
......
...@@ -3,6 +3,7 @@ import unittest ...@@ -3,6 +3,7 @@ import unittest
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_MOE_MODEL_NAME_FOR_TEST, DEFAULT_MOE_MODEL_NAME_FOR_TEST,
CustomTestCase,
get_bool_env_var, get_bool_env_var,
is_in_ci, is_in_ci,
run_bench_one_batch, run_bench_one_batch,
...@@ -10,7 +11,7 @@ from sglang.test.test_utils import ( ...@@ -10,7 +11,7 @@ from sglang.test.test_utils import (
) )
class TestBenchOneBatch(unittest.TestCase): class TestBenchOneBatch(CustomTestCase):
def test_bs1(self): def test_bs1(self):
output_throughput = run_bench_one_batch( output_throughput = run_bench_one_batch(
DEFAULT_MODEL_NAME_FOR_TEST, ["--cuda-graph-max-bs", "2"] DEFAULT_MODEL_NAME_FOR_TEST, ["--cuda-graph-max-bs", "2"]
......
...@@ -6,13 +6,14 @@ from sglang.test.test_utils import ( ...@@ -6,13 +6,14 @@ from sglang.test.test_utils import (
DEFAULT_FP8_MODEL_NAME_FOR_TEST, DEFAULT_FP8_MODEL_NAME_FOR_TEST,
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_MOE_MODEL_NAME_FOR_TEST, DEFAULT_MOE_MODEL_NAME_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
run_bench_serving, run_bench_serving,
write_github_step_summary, write_github_step_summary,
) )
class TestBenchServing(unittest.TestCase): class TestBenchServing(CustomTestCase):
def test_offline_throughput_default(self): def test_offline_throughput_default(self):
res = run_bench_serving( res = run_bench_serving(
......
...@@ -5,6 +5,7 @@ import torch ...@@ -5,6 +5,7 @@ import torch
from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
from sglang.test.test_utils import CustomTestCase
# For test # For test
...@@ -121,7 +122,7 @@ def torch_w8a8_block_int8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_shape): ...@@ -121,7 +122,7 @@ def torch_w8a8_block_int8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_shape):
).sum(dim=1) ).sum(dim=1)
class TestW8A8BlockINT8FusedMoE(unittest.TestCase): class TestW8A8BlockINT8FusedMoE(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16] DTYPES = [torch.half, torch.bfloat16]
M = [1, 33, 64, 222] M = [1, 33, 64, 222]
N = [128, 1024] N = [128, 1024]
......
...@@ -8,11 +8,12 @@ from sglang.srt.utils import kill_process_tree ...@@ -8,11 +8,12 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestCacheReport(unittest.TestCase): class TestCacheReport(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
......
...@@ -4,10 +4,10 @@ python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_p ...@@ -4,10 +4,10 @@ python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_p
import unittest import unittest
from sglang.test.test_utils import run_mmlu_test, run_mulit_request_test from sglang.test.test_utils import CustomTestCase, run_mmlu_test, run_mulit_request_test
class TestChunkedPrefill(unittest.TestCase): class TestChunkedPrefill(CustomTestCase):
def test_chunked_prefill(self): def test_chunked_prefill(self):
run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False) run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False)
......
...@@ -5,9 +5,10 @@ import numpy as np ...@@ -5,9 +5,10 @@ import numpy as np
import torch import torch
from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
from sglang.test.test_utils import CustomTestCase
class TestCreateKvIndices(unittest.TestCase): class TestCreateKvIndices(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
if not torch.cuda.is_available(): if not torch.cuda.is_available():
......
...@@ -17,6 +17,7 @@ from sglang.srt.distributed.parallel_state import ( ...@@ -17,6 +17,7 @@ from sglang.srt.distributed.parallel_state import (
graph_capture, graph_capture,
initialize_model_parallel, initialize_model_parallel,
) )
from sglang.test.test_utils import CustomTestCase
def get_open_port() -> int: def get_open_port() -> int:
...@@ -54,7 +55,7 @@ def multi_process_parallel( ...@@ -54,7 +55,7 @@ def multi_process_parallel(
ray.shutdown() ray.shutdown()
class TestCustomAllReduce(unittest.TestCase): class TestCustomAllReduce(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
random.seed(42) random.seed(42)
......
...@@ -10,11 +10,12 @@ from sglang.test.test_utils import ( ...@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDataParallelism(unittest.TestCase): class TestDataParallelism(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
......
...@@ -8,11 +8,12 @@ from sglang.test.test_utils import ( ...@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDoubleSparsity(unittest.TestCase): class TestDoubleSparsity(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
......
...@@ -7,11 +7,12 @@ from sglang.test.test_utils import ( ...@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST, DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDPAttentionDP2TP2(unittest.TestCase): class TestDPAttentionDP2TP2(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
......
...@@ -24,6 +24,7 @@ from sglang.test.test_utils import ( ...@@ -24,6 +24,7 @@ from sglang.test.test_utils import (
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST, DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
run_logprob_check, run_logprob_check,
) )
...@@ -33,7 +34,7 @@ prefill_tolerance = 5e-2 ...@@ -33,7 +34,7 @@ prefill_tolerance = 5e-2
decode_tolerance: float = 5e-2 decode_tolerance: float = 5e-2
class TestEAGLEEngine(unittest.TestCase): class TestEAGLEEngine(CustomTestCase):
BASE_CONFIG = { BASE_CONFIG = {
"model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST, "model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
"speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST, "speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
...@@ -179,7 +180,7 @@ class TestEAGLE3Engine(TestEAGLEEngine): ...@@ -179,7 +180,7 @@ class TestEAGLE3Engine(TestEAGLEEngine):
NUM_CONFIGS = 1 NUM_CONFIGS = 1
class TestEAGLEServer(unittest.TestCase): class TestEAGLEServer(CustomTestCase):
PROMPTS = [ PROMPTS = [
"[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nToday is a sunny day and I like[/INST]" "[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nToday is a sunny day and I like[/INST]"
'[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nWhat are the mental triggers in Jeff Walker\'s Product Launch Formula and "Launch" book?[/INST]', '[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nWhat are the mental triggers in Jeff Walker\'s Product Launch Formula and "Launch" book?[/INST]',
......
...@@ -15,6 +15,7 @@ from sglang.test.test_utils import ( ...@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
...@@ -42,7 +43,7 @@ def setup_class(cls, backend: str, disable_overlap: bool): ...@@ -42,7 +43,7 @@ def setup_class(cls, backend: str, disable_overlap: bool):
) )
class TestEBNFConstrained(unittest.TestCase): class TestEBNFConstrained(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
setup_class(cls, "xgrammar", disable_overlap=False) setup_class(cls, "xgrammar", disable_overlap=False)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment