Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
15ddd843
"docs/source/Tuner/BatchTuner.rst" did not exist on "3ec26b40afd88b8255ebc74b46f475b77aa4d19b"
Unverified
Commit
15ddd843
authored
Mar 26, 2025
by
fzyzcjy
Committed by
GitHub
Mar 25, 2025
Browse files
Add retry for flaky tests in CI (#4755)
parent
52029bd1
Changes
112
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
69 additions
and
28 deletions
+69
-28
.github/workflows/execute-notebook.yml
.github/workflows/execute-notebook.yml
+1
-1
docs/Makefile
docs/Makefile
+2
-1
python/sglang/test/test_activation.py
python/sglang/test/test_activation.py
+2
-1
python/sglang/test/test_block_fp8.py
python/sglang/test/test_block_fp8.py
+5
-4
python/sglang/test/test_block_fp8_ep.py
python/sglang/test/test_block_fp8_ep.py
+2
-1
python/sglang/test/test_dynamic_grad_mode.py
python/sglang/test/test_dynamic_grad_mode.py
+2
-1
python/sglang/test/test_layernorm.py
python/sglang/test/test_layernorm.py
+3
-2
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+28
-0
test/lang/test_anthropic_backend.py
test/lang/test_anthropic_backend.py
+2
-1
test/lang/test_bind_cache.py
test/lang/test_bind_cache.py
+2
-2
test/lang/test_choices.py
test/lang/test_choices.py
+2
-1
test/lang/test_litellm_backend.py
test/lang/test_litellm_backend.py
+2
-1
test/lang/test_openai_backend.py
test/lang/test_openai_backend.py
+2
-1
test/lang/test_srt_backend.py
test/lang/test_srt_backend.py
+2
-2
test/lang/test_tracing.py
test/lang/test_tracing.py
+2
-1
test/lang/test_vertexai_backend.py
test/lang/test_vertexai_backend.py
+2
-1
test/srt/models/lora/test_lora.py
test/srt/models/lora/test_lora.py
+2
-1
test/srt/models/lora/test_lora_backend.py
test/srt/models/lora/test_lora_backend.py
+2
-2
test/srt/models/lora/test_lora_tp.py
test/srt/models/lora/test_lora_tp.py
+2
-2
test/srt/models/lora/test_multi_lora_backend.py
test/srt/models/lora/test_multi_lora_backend.py
+2
-2
No files found.
.github/workflows/execute-notebook.yml
View file @
15ddd843
...
...
@@ -33,7 +33,7 @@ jobs:
pip install -r docs/requirements.txt
apt-get update
apt-get install -y pandoc
apt-get update && apt-get install -y parallel
apt-get update && apt-get install -y parallel
retry
-
name
:
Setup Jupyter Kernel
run
:
|
...
...
docs/Makefile
View file @
15ddd843
...
...
@@ -23,7 +23,8 @@ compile:
parallel
-0
-j3
--halt
soon,fail
=
1
' \
NB_NAME=$
$(
basename
{
})
; \
START_TIME=$$(date +%s); \
jupyter nbconvert --to notebook --execute --inplace "{}" \
retry --delay=0 --times=3 -- \
jupyter nbconvert --to notebook --execute --inplace "{}" \
--ExecutePreprocessor.timeout=600 \
--ExecutePreprocessor.kernel_name=python3; \
RET_CODE=$$?; \
...
...
python/sglang/test/test_activation.py
View file @
15ddd843
...
...
@@ -4,9 +4,10 @@ import unittest
import
torch
from
sglang.srt.layers.activation
import
GeluAndMul
from
sglang.test.test_utils
import
CustomTestCase
class
TestGeluAndMul
(
unittest
.
TestCase
):
class
TestGeluAndMul
(
Custom
TestCase
):
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
]
NUM_TOKENS
=
[
7
,
83
,
2048
]
D
=
[
512
,
4096
,
5120
,
13824
]
...
...
python/sglang/test/test_block_fp8.py
View file @
15ddd843
...
...
@@ -11,6 +11,7 @@ from sglang.srt.layers.quantization.fp8_kernel import (
static_quant_fp8
,
w8a8_block_fp8_matmul
,
)
from
sglang.test.test_utils
import
CustomTestCase
_is_cuda
=
torch
.
cuda
.
is_available
()
and
torch
.
version
.
cuda
...
...
@@ -44,7 +45,7 @@ def native_per_token_group_quant_fp8(
return
x_q
,
x_s
class
TestPerTokenGroupQuantFP8
(
unittest
.
TestCase
):
class
TestPerTokenGroupQuantFP8
(
Custom
TestCase
):
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
,
torch
.
float32
]
NUM_TOKENS
=
[
7
,
83
,
2048
]
D
=
[
512
,
4096
,
5120
,
13824
]
...
...
@@ -111,7 +112,7 @@ def native_static_quant_fp8(x, x_s, dtype=torch.float8_e4m3fn):
return
x_q
,
x_s
class
TestStaticQuantFP8
(
unittest
.
TestCase
):
class
TestStaticQuantFP8
(
Custom
TestCase
):
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
,
torch
.
float32
]
NUM_TOKENS
=
[
7
,
83
,
2048
]
D
=
[
512
,
4096
,
5120
,
13824
]
...
...
@@ -210,7 +211,7 @@ def native_w8a8_block_fp8_matmul(A, B, As, Bs, block_size, output_dtype=torch.fl
return
C
class
TestW8A8BlockFP8Matmul
(
unittest
.
TestCase
):
class
TestW8A8BlockFP8Matmul
(
Custom
TestCase
):
if
not
_is_cuda
:
OUT_DTYPES
=
[
torch
.
float32
,
torch
.
half
,
torch
.
bfloat16
]
...
...
@@ -331,7 +332,7 @@ def torch_w8a8_block_fp8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_shape):
).
sum
(
dim
=
1
)
class
TestW8A8BlockFP8FusedMoE
(
unittest
.
TestCase
):
class
TestW8A8BlockFP8FusedMoE
(
Custom
TestCase
):
DTYPES
=
[
torch
.
float32
,
torch
.
half
,
torch
.
bfloat16
]
M
=
[
1
,
33
,
64
,
222
,
1024
*
128
]
N
=
[
128
,
1024
,
2048
]
...
...
python/sglang/test/test_block_fp8_ep.py
View file @
15ddd843
...
...
@@ -13,6 +13,7 @@ from sglang.srt.layers.moe.ep_moe.kernels import (
silu_and_mul_triton_kernel
,
)
from
sglang.srt.layers.moe.topk
import
select_experts
from
sglang.test.test_utils
import
CustomTestCase
# For test
...
...
@@ -232,7 +233,7 @@ def block_dequant(
return
x_dq_block
class
TestW8A8BlockFP8EPMoE
(
unittest
.
TestCase
):
class
TestW8A8BlockFP8EPMoE
(
Custom
TestCase
):
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
]
M
=
[
1
,
222
,
1024
,
2048
]
N
=
[
128
,
1024
,
2048
]
...
...
python/sglang/test/test_dynamic_grad_mode.py
View file @
15ddd843
...
...
@@ -3,9 +3,10 @@ import unittest
import
torch
from
sglang.srt.utils
import
DynamicGradMode
from
sglang.test.test_utils
import
CustomTestCase
class
TestDynamicGradMode
(
unittest
.
TestCase
):
class
TestDynamicGradMode
(
Custom
TestCase
):
def
test_inference
(
self
):
# Test inference_mode
DynamicGradMode
.
set_inference_mode
(
True
)
...
...
python/sglang/test/test_layernorm.py
View file @
15ddd843
...
...
@@ -4,9 +4,10 @@ import unittest
import
torch
from
sglang.srt.layers.layernorm
import
GemmaRMSNorm
,
RMSNorm
from
sglang.test.test_utils
import
CustomTestCase
class
TestRMSNorm
(
unittest
.
TestCase
):
class
TestRMSNorm
(
Custom
TestCase
):
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
]
NUM_TOKENS
=
[
7
,
83
,
4096
]
HIDDEN_SIZES
=
[
768
,
769
,
770
,
771
,
5120
,
5124
,
5125
,
5126
,
8192
,
8199
]
...
...
@@ -56,7 +57,7 @@ class TestRMSNorm(unittest.TestCase):
self
.
_run_rms_norm_test
(
*
params
)
class
TestGemmaRMSNorm
(
unittest
.
TestCase
):
class
TestGemmaRMSNorm
(
Custom
TestCase
):
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
]
NUM_TOKENS
=
[
7
,
83
,
4096
]
HIDDEN_SIZES
=
[
768
,
769
,
770
,
771
,
5120
,
5124
,
5125
,
5126
,
8192
,
8199
]
...
...
python/sglang/test/test_utils.py
View file @
15ddd843
...
...
@@ -8,6 +8,7 @@ import random
import
subprocess
import
threading
import
time
import
traceback
import
unittest
from
concurrent.futures
import
ThreadPoolExecutor
from
dataclasses
import
dataclass
...
...
@@ -998,3 +999,30 @@ def run_logprob_check(self: unittest.TestCase, arg: Tuple):
rank
+=
1
else
:
raise
class
CustomTestCase
(
unittest
.
TestCase
):
def
_callTestMethod
(
self
,
method
):
_retry_execution
(
lambda
:
super
(
CustomTestCase
,
self
).
_callTestMethod
(
method
),
max_retry
=
_get_max_retry
(),
)
def
_get_max_retry
():
return
int
(
os
.
environ
.
get
(
"SGLANG_TEST_MAX_RETRY"
,
"2"
if
is_in_ci
()
else
"0"
))
def
_retry_execution
(
fn
,
max_retry
:
int
):
if
max_retry
==
0
:
fn
()
return
try
:
fn
()
except
Exception
as
e
:
print
(
f
"retry_execution failed once and will retry. This may be an error or a flaky test. Error:
{
e
}
"
)
traceback
.
print_exc
()
_retry_execution
(
fn
,
max_retry
=
max_retry
-
1
)
test/lang/test_anthropic_backend.py
View file @
15ddd843
...
...
@@ -3,9 +3,10 @@ import unittest
from
sglang
import
Anthropic
,
set_default_backend
from
sglang.test.test_programs
import
test_mt_bench
,
test_stream
from
sglang.test.test_utils
import
CustomTestCase
class
TestAnthropicBackend
(
unittest
.
TestCase
):
class
TestAnthropicBackend
(
Custom
TestCase
):
backend
=
None
@
classmethod
...
...
test/lang/test_bind_cache.py
View file @
15ddd843
import
unittest
import
sglang
as
sgl
from
sglang.test.test_utils
import
DEFAULT_MODEL_NAME_FOR_TEST
from
sglang.test.test_utils
import
DEFAULT_MODEL_NAME_FOR_TEST
,
CustomTestCase
class
TestBind
(
unittest
.
TestCase
):
class
TestBind
(
Custom
TestCase
):
backend
=
None
@
classmethod
...
...
test/lang/test_choices.py
View file @
15ddd843
...
...
@@ -7,6 +7,7 @@ from sglang.lang.choices import (
token_length_normalized
,
unconditional_likelihood_normalized
,
)
from
sglang.test.test_utils
import
CustomTestCase
MOCK_CHOICES_INPUT_DATA
=
{
"choices"
:
[
...
...
@@ -51,7 +52,7 @@ MOCK_CHOICES_INPUT_DATA = {
}
class
TestChoices
(
unittest
.
TestCase
):
class
TestChoices
(
Custom
TestCase
):
def
test_token_length_normalized
(
self
):
"""Confirm 'antidisestablishmentarianism' is selected due to high confidences for
...
...
test/lang/test_litellm_backend.py
View file @
15ddd843
...
...
@@ -3,9 +3,10 @@ import unittest
from
sglang
import
LiteLLM
,
set_default_backend
from
sglang.test.test_programs
import
test_mt_bench
,
test_stream
from
sglang.test.test_utils
import
CustomTestCase
class
TestAnthropicBackend
(
unittest
.
TestCase
):
class
TestAnthropicBackend
(
Custom
TestCase
):
chat_backend
=
None
@
classmethod
...
...
test/lang/test_openai_backend.py
View file @
15ddd843
...
...
@@ -17,9 +17,10 @@ from sglang.test.test_programs import (
test_stream
,
test_tool_use
,
)
from
sglang.test.test_utils
import
CustomTestCase
class
TestOpenAIBackend
(
unittest
.
TestCase
):
class
TestOpenAIBackend
(
Custom
TestCase
):
instruct_backend
=
None
chat_backend
=
None
chat_vision_backend
=
None
...
...
test/lang/test_srt_backend.py
View file @
15ddd843
...
...
@@ -22,10 +22,10 @@ from sglang.test.test_programs import (
test_stream
,
test_tool_use
,
)
from
sglang.test.test_utils
import
DEFAULT_MODEL_NAME_FOR_TEST
from
sglang.test.test_utils
import
DEFAULT_MODEL_NAME_FOR_TEST
,
CustomTestCase
class
TestSRTBackend
(
unittest
.
TestCase
):
class
TestSRTBackend
(
Custom
TestCase
):
backend
=
None
@
classmethod
...
...
test/lang/test_tracing.py
View file @
15ddd843
...
...
@@ -3,9 +3,10 @@ import unittest
import
sglang
as
sgl
from
sglang.lang.backend.base_backend
import
BaseBackend
from
sglang.lang.chat_template
import
get_chat_template
from
sglang.test.test_utils
import
CustomTestCase
class
TestTracing
(
unittest
.
TestCase
):
class
TestTracing
(
Custom
TestCase
):
def
test_few_shot_qa
(
self
):
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
...
...
test/lang/test_vertexai_backend.py
View file @
15ddd843
...
...
@@ -10,9 +10,10 @@ from sglang.test.test_programs import (
test_parallel_encoding
,
test_stream
,
)
from
sglang.test.test_utils
import
CustomTestCase
class
TestVertexAIBackend
(
unittest
.
TestCase
):
class
TestVertexAIBackend
(
Custom
TestCase
):
backend
=
None
@
classmethod
...
...
test/srt/models/lora/test_lora.py
View file @
15ddd843
...
...
@@ -18,6 +18,7 @@ import unittest
import
torch
from
sglang.test.runners
import
HFRunner
,
SRTRunner
from
sglang.test.test_utils
import
CustomTestCase
LORA_SETS
=
[
# {
...
...
@@ -70,7 +71,7 @@ What do you know about llamas?
# PROMPTS.append(sample[0]["content"][:2000])
class
TestLoRA
(
unittest
.
TestCase
):
class
TestLoRA
(
Custom
TestCase
):
def
inference
(
self
,
prompts
,
lora_set
,
tp_size
,
torch_dtype
,
max_new_tokens
):
print
(
"=================== testing inference ======================="
)
...
...
test/srt/models/lora/test_lora_backend.py
View file @
15ddd843
...
...
@@ -21,7 +21,7 @@ import torch
from
utils
import
BACKENDS
,
TORCH_DTYPES
,
LoRAAdaptor
,
LoRAModelCase
from
sglang.test.runners
import
HFRunner
,
SRTRunner
from
sglang.test.test_utils
import
calculate_rouge_l
,
is_in_ci
from
sglang.test.test_utils
import
CustomTestCase
,
calculate_rouge_l
,
is_in_ci
CI_LORA_MODELS
=
[
LoRAModelCase
(
...
...
@@ -67,7 +67,7 @@ PROMPTS = [
]
class
TestLoRABackend
(
unittest
.
TestCase
):
class
TestLoRABackend
(
Custom
TestCase
):
def
run_backend
(
self
,
prompt
:
str
,
...
...
test/srt/models/lora/test_lora_tp.py
View file @
15ddd843
...
...
@@ -21,7 +21,7 @@ import torch
from
utils
import
TORCH_DTYPES
,
LoRAAdaptor
,
LoRAModelCase
from
sglang.test.runners
import
HFRunner
,
SRTRunner
from
sglang.test.test_utils
import
calculate_rouge_l
,
is_in_ci
from
sglang.test.test_utils
import
CustomTestCase
,
calculate_rouge_l
,
is_in_ci
CI_LORA_MODELS
=
[
LoRAModelCase
(
...
...
@@ -69,7 +69,7 @@ PROMPTS = [
BACKEND
=
"triton"
class
TestLoRATP
(
unittest
.
TestCase
):
class
TestLoRATP
(
Custom
TestCase
):
def
run_tp
(
self
,
prompt
:
str
,
...
...
test/srt/models/lora/test_multi_lora_backend.py
View file @
15ddd843
...
...
@@ -19,7 +19,7 @@ from typing import List
import
torch
from
utils
import
BACKENDS
,
TORCH_DTYPES
,
LoRAAdaptor
,
LoRAModelCase
from
sglang.test.test_utils
import
is_in_ci
from
sglang.test.test_utils
import
CustomTestCase
,
is_in_ci
MULTI_LORA_MODELS
=
[
LoRAModelCase
(
...
...
@@ -51,7 +51,7 @@ PROMPTS = [
]
class
TestMultiLoRABackend
(
unittest
.
TestCase
):
class
TestMultiLoRABackend
(
Custom
TestCase
):
def
run_backend_batch
(
self
,
prompts
:
List
[
str
],
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment