Unverified Commit 15ddd843 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Add retry for flaky tests in CI (#4755)

parent 52029bd1
......@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestOpenAIServer(unittest.TestCase):
class TestOpenAIServer(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = "intfloat/e5-mistral-7b-instruct"
......
......@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci,
popen_launch_server,
write_github_step_summary,
)
class TestEvalAccuracyLarge(unittest.TestCase):
class TestEvalAccuracyLarge(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
......
......@@ -13,11 +13,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestEvalFP8Accuracy(unittest.TestCase):
class TestEvalFP8Accuracy(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST
......@@ -44,7 +45,7 @@ class TestEvalFP8Accuracy(unittest.TestCase):
self.assertGreaterEqual(metrics["score"], 0.61)
class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
class TestEvalFP8DynamicQuantAccuracy(CustomTestCase):
def _run_test(self, model, other_args, expected_score):
base_url = DEFAULT_URL_FOR_TEST
......@@ -109,7 +110,7 @@ class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
)
class TestEvalFP8ModelOptQuantAccuracy(unittest.TestCase):
class TestEvalFP8ModelOptQuantAccuracy(CustomTestCase):
def _run_test(self, model, other_args, expected_score):
base_url = DEFAULT_URL_FOR_TEST
......
......@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestExpertDistribution(unittest.TestCase):
class TestExpertDistribution(CustomTestCase):
def setUp(self):
# Clean up any existing expert distribution files before each test
for f in glob.glob("expert_distribution_*.csv"):
......
......@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestFimCompletion(unittest.TestCase):
class TestFimCompletion(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = "deepseek-ai/deepseek-coder-1.3b-base"
......
......@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.fp8_kernel import (
per_token_group_quant_fp8,
w8a8_block_fp8_matmul,
)
from sglang.test.test_utils import CustomTestCase
class TestFP8Base(unittest.TestCase):
class TestFP8Base(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.M = 256
......
......@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestFp8KvcacheBase(unittest.TestCase):
class TestFp8KvcacheBase(CustomTestCase):
model_config = None
@classmethod
......
......@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestOpenAIServerFunctionCalling(unittest.TestCase):
class TestOpenAIServerFunctionCalling(CustomTestCase):
@classmethod
def setUpClass(cls):
# Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
......
......@@ -7,9 +7,10 @@ from vllm.model_executor.layers.fused_moe import fused_moe as fused_moe_vllm
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
from sglang.test.test_utils import CustomTestCase
class TestFusedMOE(unittest.TestCase):
class TestFusedMOE(CustomTestCase):
NUM_EXPERTS = [8, 64]
TOP_KS = [2, 6]
......
......@@ -12,6 +12,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci,
popen_launch_server,
)
......@@ -26,7 +27,7 @@ def _process_return(ret):
return np.array(ret)
class TestGetWeightsByName(unittest.TestCase):
class TestGetWeightsByName(CustomTestCase):
def init_hf_model(self, model_name, tie_word_embeddings):
self.hf_model = AutoModelForCausalLM.from_pretrained(
......
......@@ -3,9 +3,10 @@ import unittest
from huggingface_hub import hf_hub_download
import sglang as sgl
from sglang.test.test_utils import CustomTestCase
class TestGGUF(unittest.TestCase):
class TestGGUF(CustomTestCase):
def test_models(self):
prompt = "Today is a sunny day and I like"
sampling_params = {"temperature": 0, "max_new_tokens": 8}
......
......@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
......@@ -102,7 +103,7 @@ def check_quant_method(model_path: str, use_marlin_kernel: bool):
# GPTQ with Dynamic Per/Module Quantization Control
# Leverages GPTQModel (pypi) to produce the `dynamic` models
# Test GPTQ fallback kernel that is not Marlin
class TestGPTQModelDynamic(unittest.TestCase):
class TestGPTQModelDynamic(CustomTestCase):
MODEL_PATH = (
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse"
)
......@@ -157,7 +158,7 @@ class TestGPTQModelDynamic(unittest.TestCase):
# GPTQ with Dynamic Per/Module Quantization Control
# Leverages GPTQModel (pypi) to produce the `dynamic` models
# Test Marlin kernel
class TestGPTQModelDynamicWithMarlin(unittest.TestCase):
class TestGPTQModelDynamicWithMarlin(CustomTestCase):
MODEL_PATH = (
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue"
)
......
......@@ -3,11 +3,12 @@ import unittest
from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestHealthCheck(unittest.TestCase):
class TestHealthCheck(CustomTestCase):
def test_health_check(self):
"""Test that metrics endpoint returns data when enabled"""
with self.assertRaises(TimeoutError):
......
......@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestPageSize(unittest.TestCase):
class TestPageSize(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
......
......@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestHierarchicalMLA(unittest.TestCase):
class TestHierarchicalMLA(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
......
......@@ -4,10 +4,10 @@ import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import sglang as sgl
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase
class TestHiddenState(unittest.TestCase):
class TestHiddenState(CustomTestCase):
def test_return_hidden_states(self):
prompts = ["Today is", "Today is a sunny day and I like"]
model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
......
......@@ -11,11 +11,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestInputEmbeds(unittest.TestCase):
class TestInputEmbeds(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
......
......@@ -6,6 +6,7 @@ import torch
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
from sglang.srt.layers.quantization.int8_kernel import per_token_quant_int8
from sglang.test.test_utils import CustomTestCase
def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16):
......@@ -71,7 +72,7 @@ def torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, score, topk):
).sum(dim=1)
class TestW8A8Int8FusedMoE(unittest.TestCase):
class TestW8A8Int8FusedMoE(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16]
M = [1, 33]
N = [128, 1024]
......
......@@ -16,6 +16,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
......@@ -50,7 +51,7 @@ def setup_class(cls, backend: str):
)
class TestJSONConstrainedOutlinesBackend(unittest.TestCase):
class TestJSONConstrainedOutlinesBackend(CustomTestCase):
@classmethod
def setUpClass(cls):
setup_class(cls, backend="outlines")
......
......@@ -17,11 +17,12 @@ from sglang.test.test_utils import (
DEFAULT_URL_FOR_TEST,
STDERR_FILENAME,
STDOUT_FILENAME,
CustomTestCase,
popen_launch_server,
)
class TestLargeMaxNewTokens(unittest.TestCase):
class TestLargeMaxNewTokens(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment