Unverified Commit 15ddd843 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Add retry for flaky tests in CI (#4755)

parent 52029bd1
......@@ -20,7 +20,7 @@ import torch
from transformers import AutoConfig, AutoTokenizer
from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
from sglang.test.test_utils import get_similarities, is_in_ci
from sglang.test.test_utils import CustomTestCase, get_similarities, is_in_ci
MODELS = [
("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5),
......@@ -31,7 +31,7 @@ MODELS = [
TORCH_DTYPES = [torch.float16]
class TestEmbeddingModels(unittest.TestCase):
class TestEmbeddingModels(CustomTestCase):
@classmethod
def setUpClass(cls):
......
......@@ -33,7 +33,7 @@ from sglang.test.runners import (
SRTRunner,
check_close_model_outputs,
)
from sglang.test.test_utils import is_in_ci
from sglang.test.test_utils import CustomTestCase, is_in_ci
@dataclasses.dataclass
......@@ -71,7 +71,7 @@ ALL_OTHER_MODELS = [
TORCH_DTYPES = [torch.float16]
class TestGenerationModels(unittest.TestCase):
class TestGenerationModels(CustomTestCase):
@classmethod
def setUpClass(cls):
......
......@@ -19,7 +19,7 @@ import unittest
import torch
from sglang.test.runners import HFRunner, SRTRunner
from sglang.test.test_utils import get_similarities
from sglang.test.test_utils import CustomTestCase, get_similarities
TEXTS = "two Subway Series sandwiches with meats, cheese, lettuce, tomatoes, and onions on a black background, accompanied by the Subway Series logo, highlighting a new sandwich series."
IMAGES = "https://huggingface.co/datasets/liuhaotian/llava-bench-in-the-wild/resolve/main/images/023.jpg"
......@@ -31,7 +31,7 @@ MODELS = [
TORCH_DTYPES = [torch.float16]
class TestQmeQwenModels(unittest.TestCase):
class TestQmeQwenModels(CustomTestCase):
@classmethod
def setUpClass(cls):
mp.set_start_method("spawn", force=True)
......
......@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestGrok(unittest.TestCase):
class TestGrok(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = "lmzheng/grok-1"
......
......@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestQwen2(unittest.TestCase):
class TestQwen2(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2-7B-Instruct"
......@@ -41,7 +42,7 @@ class TestQwen2(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.78)
class TestQwen2FP8(unittest.TestCase):
class TestQwen2FP8(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = "neuralmagic/Qwen2-7B-Instruct-FP8"
......
......@@ -18,6 +18,7 @@ import unittest
import torch
from sglang.test.runners import HFRunner, SRTRunner
from sglang.test.test_utils import CustomTestCase
MODELS = [
("LxzGordon/URM-LLaMa-3.1-8B", 1, 4e-2),
......@@ -41,7 +42,7 @@ CONVS = [
]
class TestRewardModels(unittest.TestCase):
class TestRewardModels(CustomTestCase):
@classmethod
def setUpClass(cls):
......
......@@ -5,10 +5,10 @@ from concurrent.futures import ThreadPoolExecutor
import requests
from sglang.test.test_utils import run_and_check_memory_leak
from sglang.test.test_utils import CustomTestCase, run_and_check_memory_leak
class TestAbort(unittest.TestCase):
class TestAbort(CustomTestCase):
def workload_func(self, base_url, model):
def process_func():
def run_one(_):
......
......@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestAWQ(unittest.TestCase):
class TestAWQ(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST
......
......@@ -3,6 +3,7 @@ import unittest
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
CustomTestCase,
get_bool_env_var,
is_in_ci,
run_bench_one_batch,
......@@ -10,7 +11,7 @@ from sglang.test.test_utils import (
)
class TestBenchOneBatch(unittest.TestCase):
class TestBenchOneBatch(CustomTestCase):
def test_bs1(self):
output_throughput = run_bench_one_batch(
DEFAULT_MODEL_NAME_FOR_TEST, ["--cuda-graph-max-bs", "2"]
......
......@@ -6,13 +6,14 @@ from sglang.test.test_utils import (
DEFAULT_FP8_MODEL_NAME_FOR_TEST,
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
CustomTestCase,
is_in_ci,
run_bench_serving,
write_github_step_summary,
)
class TestBenchServing(unittest.TestCase):
class TestBenchServing(CustomTestCase):
def test_offline_throughput_default(self):
res = run_bench_serving(
......
......@@ -5,6 +5,7 @@ import torch
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
from sglang.test.test_utils import CustomTestCase
# For test
......@@ -121,7 +122,7 @@ def torch_w8a8_block_int8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_shape):
).sum(dim=1)
class TestW8A8BlockINT8FusedMoE(unittest.TestCase):
class TestW8A8BlockINT8FusedMoE(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16]
M = [1, 33, 64, 222]
N = [128, 1024]
......
......@@ -8,11 +8,12 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestCacheReport(unittest.TestCase):
class TestCacheReport(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
......
......@@ -4,10 +4,10 @@ python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_p
import unittest
from sglang.test.test_utils import run_mmlu_test, run_mulit_request_test
from sglang.test.test_utils import CustomTestCase, run_mmlu_test, run_mulit_request_test
class TestChunkedPrefill(unittest.TestCase):
class TestChunkedPrefill(CustomTestCase):
def test_chunked_prefill(self):
run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False)
......
......@@ -5,9 +5,10 @@ import numpy as np
import torch
from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
from sglang.test.test_utils import CustomTestCase
class TestCreateKvIndices(unittest.TestCase):
class TestCreateKvIndices(CustomTestCase):
@classmethod
def setUpClass(cls):
if not torch.cuda.is_available():
......
......@@ -17,6 +17,7 @@ from sglang.srt.distributed.parallel_state import (
graph_capture,
initialize_model_parallel,
)
from sglang.test.test_utils import CustomTestCase
def get_open_port() -> int:
......@@ -54,7 +55,7 @@ def multi_process_parallel(
ray.shutdown()
class TestCustomAllReduce(unittest.TestCase):
class TestCustomAllReduce(CustomTestCase):
@classmethod
def setUpClass(cls):
random.seed(42)
......
......@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestDataParallelism(unittest.TestCase):
class TestDataParallelism(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
......
......@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestDoubleSparsity(unittest.TestCase):
class TestDoubleSparsity(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
......
......@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestDPAttentionDP2TP2(unittest.TestCase):
class TestDPAttentionDP2TP2(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
......
......@@ -24,6 +24,7 @@ from sglang.test.test_utils import (
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
run_logprob_check,
)
......@@ -33,7 +34,7 @@ prefill_tolerance = 5e-2
decode_tolerance: float = 5e-2
class TestEAGLEEngine(unittest.TestCase):
class TestEAGLEEngine(CustomTestCase):
BASE_CONFIG = {
"model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
"speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
......@@ -179,7 +180,7 @@ class TestEAGLE3Engine(TestEAGLEEngine):
NUM_CONFIGS = 1
class TestEAGLEServer(unittest.TestCase):
class TestEAGLEServer(CustomTestCase):
PROMPTS = [
"[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nToday is a sunny day and I like[/INST]"
'[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nWhat are the mental triggers in Jeff Walker\'s Product Launch Formula and "Launch" book?[/INST]',
......
......@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
......@@ -42,7 +43,7 @@ def setup_class(cls, backend: str, disable_overlap: bool):
)
class TestEBNFConstrained(unittest.TestCase):
class TestEBNFConstrained(CustomTestCase):
@classmethod
def setUpClass(cls):
setup_class(cls, "xgrammar", disable_overlap=False)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment