Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
15ddd843
Unverified
Commit
15ddd843
authored
Mar 26, 2025
by
fzyzcjy
Committed by
GitHub
Mar 25, 2025
Browse files
Add retry for flaky tests in CI (#4755)
parent
52029bd1
Changes
112
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
43 additions
and
24 deletions
+43
-24
test/srt/test_embedding_openai_server.py
test/srt/test_embedding_openai_server.py
+2
-1
test/srt/test_eval_accuracy_large.py
test/srt/test_eval_accuracy_large.py
+2
-1
test/srt/test_eval_fp8_accuracy.py
test/srt/test_eval_fp8_accuracy.py
+4
-3
test/srt/test_expert_distribution.py
test/srt/test_expert_distribution.py
+2
-1
test/srt/test_fim_completion.py
test/srt/test_fim_completion.py
+2
-1
test/srt/test_fp8_kernel.py
test/srt/test_fp8_kernel.py
+2
-1
test/srt/test_fp8_kvcache.py
test/srt/test_fp8_kvcache.py
+2
-1
test/srt/test_function_calling.py
test/srt/test_function_calling.py
+2
-1
test/srt/test_fused_moe.py
test/srt/test_fused_moe.py
+2
-1
test/srt/test_get_weights_by_name.py
test/srt/test_get_weights_by_name.py
+2
-1
test/srt/test_gguf.py
test/srt/test_gguf.py
+2
-1
test/srt/test_gptqmodel_dynamic.py
test/srt/test_gptqmodel_dynamic.py
+3
-2
test/srt/test_health_check.py
test/srt/test_health_check.py
+2
-1
test/srt/test_hicache.py
test/srt/test_hicache.py
+2
-1
test/srt/test_hicache_mla.py
test/srt/test_hicache_mla.py
+2
-1
test/srt/test_hidden_states.py
test/srt/test_hidden_states.py
+2
-2
test/srt/test_input_embeddings.py
test/srt/test_input_embeddings.py
+2
-1
test/srt/test_int8_kernel.py
test/srt/test_int8_kernel.py
+2
-1
test/srt/test_json_constrained.py
test/srt/test_json_constrained.py
+2
-1
test/srt/test_large_max_new_tokens.py
test/srt/test_large_max_new_tokens.py
+2
-1
No files found.
test/srt/test_embedding_openai_server.py
View file @
15ddd843
...
@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
...
@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestOpenAIServer
(
unittest
.
TestCase
):
class
TestOpenAIServer
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
"intfloat/e5-mistral-7b-instruct"
cls
.
model
=
"intfloat/e5-mistral-7b-instruct"
...
...
test/srt/test_eval_accuracy_large.py
View file @
15ddd843
...
@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
...
@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
is_in_ci
,
is_in_ci
,
popen_launch_server
,
popen_launch_server
,
write_github_step_summary
,
write_github_step_summary
,
)
)
class
TestEvalAccuracyLarge
(
unittest
.
TestCase
):
class
TestEvalAccuracyLarge
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
...
...
test/srt/test_eval_fp8_accuracy.py
View file @
15ddd843
...
@@ -13,11 +13,12 @@ from sglang.test.test_utils import (
...
@@ -13,11 +13,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestEvalFP8Accuracy
(
unittest
.
TestCase
):
class
TestEvalFP8Accuracy
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST
cls
.
model
=
DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST
...
@@ -44,7 +45,7 @@ class TestEvalFP8Accuracy(unittest.TestCase):
...
@@ -44,7 +45,7 @@ class TestEvalFP8Accuracy(unittest.TestCase):
self
.
assertGreaterEqual
(
metrics
[
"score"
],
0.61
)
self
.
assertGreaterEqual
(
metrics
[
"score"
],
0.61
)
class
TestEvalFP8DynamicQuantAccuracy
(
unittest
.
TestCase
):
class
TestEvalFP8DynamicQuantAccuracy
(
Custom
TestCase
):
def
_run_test
(
self
,
model
,
other_args
,
expected_score
):
def
_run_test
(
self
,
model
,
other_args
,
expected_score
):
base_url
=
DEFAULT_URL_FOR_TEST
base_url
=
DEFAULT_URL_FOR_TEST
...
@@ -109,7 +110,7 @@ class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
...
@@ -109,7 +110,7 @@ class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
)
)
class
TestEvalFP8ModelOptQuantAccuracy
(
unittest
.
TestCase
):
class
TestEvalFP8ModelOptQuantAccuracy
(
Custom
TestCase
):
def
_run_test
(
self
,
model
,
other_args
,
expected_score
):
def
_run_test
(
self
,
model
,
other_args
,
expected_score
):
base_url
=
DEFAULT_URL_FOR_TEST
base_url
=
DEFAULT_URL_FOR_TEST
...
...
test/srt/test_expert_distribution.py
View file @
15ddd843
...
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
...
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST
,
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestExpertDistribution
(
unittest
.
TestCase
):
class
TestExpertDistribution
(
Custom
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
# Clean up any existing expert distribution files before each test
# Clean up any existing expert distribution files before each test
for
f
in
glob
.
glob
(
"expert_distribution_*.csv"
):
for
f
in
glob
.
glob
(
"expert_distribution_*.csv"
):
...
...
test/srt/test_fim_completion.py
View file @
15ddd843
...
@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
...
@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestFimCompletion
(
unittest
.
TestCase
):
class
TestFimCompletion
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/deepseek-coder-1.3b-base"
cls
.
model
=
"deepseek-ai/deepseek-coder-1.3b-base"
...
...
test/srt/test_fp8_kernel.py
View file @
15ddd843
...
@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.fp8_kernel import (
...
@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.fp8_kernel import (
per_token_group_quant_fp8
,
per_token_group_quant_fp8
,
w8a8_block_fp8_matmul
,
w8a8_block_fp8_matmul
,
)
)
from
sglang.test.test_utils
import
CustomTestCase
class
TestFP8Base
(
unittest
.
TestCase
):
class
TestFP8Base
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
M
=
256
cls
.
M
=
256
...
...
test/srt/test_fp8_kvcache.py
View file @
15ddd843
...
@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
...
@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN
,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestFp8KvcacheBase
(
unittest
.
TestCase
):
class
TestFp8KvcacheBase
(
Custom
TestCase
):
model_config
=
None
model_config
=
None
@
classmethod
@
classmethod
...
...
test/srt/test_function_calling.py
View file @
15ddd843
...
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
...
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestOpenAIServerFunctionCalling
(
unittest
.
TestCase
):
class
TestOpenAIServerFunctionCalling
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
# Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
# Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
...
...
test/srt/test_fused_moe.py
View file @
15ddd843
...
@@ -7,9 +7,10 @@ from vllm.model_executor.layers.fused_moe import fused_moe as fused_moe_vllm
...
@@ -7,9 +7,10 @@ from vllm.model_executor.layers.fused_moe import fused_moe as fused_moe_vllm
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.moe.fused_moe_triton.fused_moe
import
fused_moe
from
sglang.srt.layers.moe.fused_moe_triton.fused_moe
import
fused_moe
from
sglang.test.test_utils
import
CustomTestCase
class
TestFusedMOE
(
unittest
.
TestCase
):
class
TestFusedMOE
(
Custom
TestCase
):
NUM_EXPERTS
=
[
8
,
64
]
NUM_EXPERTS
=
[
8
,
64
]
TOP_KS
=
[
2
,
6
]
TOP_KS
=
[
2
,
6
]
...
...
test/srt/test_get_weights_by_name.py
View file @
15ddd843
...
@@ -12,6 +12,7 @@ from sglang.test.test_utils import (
...
@@ -12,6 +12,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
is_in_ci
,
is_in_ci
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -26,7 +27,7 @@ def _process_return(ret):
...
@@ -26,7 +27,7 @@ def _process_return(ret):
return
np
.
array
(
ret
)
return
np
.
array
(
ret
)
class
TestGetWeightsByName
(
unittest
.
TestCase
):
class
TestGetWeightsByName
(
Custom
TestCase
):
def
init_hf_model
(
self
,
model_name
,
tie_word_embeddings
):
def
init_hf_model
(
self
,
model_name
,
tie_word_embeddings
):
self
.
hf_model
=
AutoModelForCausalLM
.
from_pretrained
(
self
.
hf_model
=
AutoModelForCausalLM
.
from_pretrained
(
...
...
test/srt/test_gguf.py
View file @
15ddd843
...
@@ -3,9 +3,10 @@ import unittest
...
@@ -3,9 +3,10 @@ import unittest
from
huggingface_hub
import
hf_hub_download
from
huggingface_hub
import
hf_hub_download
import
sglang
as
sgl
import
sglang
as
sgl
from
sglang.test.test_utils
import
CustomTestCase
class
TestGGUF
(
unittest
.
TestCase
):
class
TestGGUF
(
Custom
TestCase
):
def
test_models
(
self
):
def
test_models
(
self
):
prompt
=
"Today is a sunny day and I like"
prompt
=
"Today is a sunny day and I like"
sampling_params
=
{
"temperature"
:
0
,
"max_new_tokens"
:
8
}
sampling_params
=
{
"temperature"
:
0
,
"max_new_tokens"
:
8
}
...
...
test/srt/test_gptqmodel_dynamic.py
View file @
15ddd843
...
@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
...
@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -102,7 +103,7 @@ def check_quant_method(model_path: str, use_marlin_kernel: bool):
...
@@ -102,7 +103,7 @@ def check_quant_method(model_path: str, use_marlin_kernel: bool):
# GPTQ with Dynamic Per/Module Quantization Control
# GPTQ with Dynamic Per/Module Quantization Control
# Leverages GPTQModel (pypi) to produce the `dynamic` models
# Leverages GPTQModel (pypi) to produce the `dynamic` models
# Test GPTQ fallback kernel that is not Marlin
# Test GPTQ fallback kernel that is not Marlin
class
TestGPTQModelDynamic
(
unittest
.
TestCase
):
class
TestGPTQModelDynamic
(
Custom
TestCase
):
MODEL_PATH
=
(
MODEL_PATH
=
(
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse"
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse"
)
)
...
@@ -157,7 +158,7 @@ class TestGPTQModelDynamic(unittest.TestCase):
...
@@ -157,7 +158,7 @@ class TestGPTQModelDynamic(unittest.TestCase):
# GPTQ with Dynamic Per/Module Quantization Control
# GPTQ with Dynamic Per/Module Quantization Control
# Leverages GPTQModel (pypi) to produce the `dynamic` models
# Leverages GPTQModel (pypi) to produce the `dynamic` models
# Test Marlin kernel
# Test Marlin kernel
class
TestGPTQModelDynamicWithMarlin
(
unittest
.
TestCase
):
class
TestGPTQModelDynamicWithMarlin
(
Custom
TestCase
):
MODEL_PATH
=
(
MODEL_PATH
=
(
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue"
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue"
)
)
...
...
test/srt/test_health_check.py
View file @
15ddd843
...
@@ -3,11 +3,12 @@ import unittest
...
@@ -3,11 +3,12 @@ import unittest
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestHealthCheck
(
unittest
.
TestCase
):
class
TestHealthCheck
(
Custom
TestCase
):
def
test_health_check
(
self
):
def
test_health_check
(
self
):
"""Test that metrics endpoint returns data when enabled"""
"""Test that metrics endpoint returns data when enabled"""
with
self
.
assertRaises
(
TimeoutError
):
with
self
.
assertRaises
(
TimeoutError
):
...
...
test/srt/test_hicache.py
View file @
15ddd843
...
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
...
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestPageSize
(
unittest
.
TestCase
):
class
TestPageSize
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
...
...
test/srt/test_hicache_mla.py
View file @
15ddd843
...
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
...
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST
,
DEFAULT_MLA_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestHierarchicalMLA
(
unittest
.
TestCase
):
class
TestHierarchicalMLA
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MLA_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MLA_MODEL_NAME_FOR_TEST
...
...
test/srt/test_hidden_states.py
View file @
15ddd843
...
@@ -4,10 +4,10 @@ import torch
...
@@ -4,10 +4,10 @@ import torch
from
transformers
import
AutoModelForCausalLM
,
AutoTokenizer
from
transformers
import
AutoModelForCausalLM
,
AutoTokenizer
import
sglang
as
sgl
import
sglang
as
sgl
from
sglang.test.test_utils
import
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
from
sglang.test.test_utils
import
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
CustomTestCase
class
TestHiddenState
(
unittest
.
TestCase
):
class
TestHiddenState
(
Custom
TestCase
):
def
test_return_hidden_states
(
self
):
def
test_return_hidden_states
(
self
):
prompts
=
[
"Today is"
,
"Today is a sunny day and I like"
]
prompts
=
[
"Today is"
,
"Today is a sunny day and I like"
]
model_path
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
model_path
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
...
...
test/srt/test_input_embeddings.py
View file @
15ddd843
...
@@ -11,11 +11,12 @@ from sglang.test.test_utils import (
...
@@ -11,11 +11,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestInputEmbeds
(
unittest
.
TestCase
):
class
TestInputEmbeds
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
...
...
test/srt/test_int8_kernel.py
View file @
15ddd843
...
@@ -6,6 +6,7 @@ import torch
...
@@ -6,6 +6,7 @@ import torch
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.moe.fused_moe_triton.fused_moe
import
fused_moe
from
sglang.srt.layers.moe.fused_moe_triton.fused_moe
import
fused_moe
from
sglang.srt.layers.quantization.int8_kernel
import
per_token_quant_int8
from
sglang.srt.layers.quantization.int8_kernel
import
per_token_quant_int8
from
sglang.test.test_utils
import
CustomTestCase
def
native_w8a8_per_token_matmul
(
A
,
B
,
As
,
Bs
,
output_dtype
=
torch
.
float16
):
def
native_w8a8_per_token_matmul
(
A
,
B
,
As
,
Bs
,
output_dtype
=
torch
.
float16
):
...
@@ -71,7 +72,7 @@ def torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, score, topk):
...
@@ -71,7 +72,7 @@ def torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, score, topk):
).
sum
(
dim
=
1
)
).
sum
(
dim
=
1
)
class
TestW8A8Int8FusedMoE
(
unittest
.
TestCase
):
class
TestW8A8Int8FusedMoE
(
Custom
TestCase
):
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
]
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
]
M
=
[
1
,
33
]
M
=
[
1
,
33
]
N
=
[
128
,
1024
]
N
=
[
128
,
1024
]
...
...
test/srt/test_json_constrained.py
View file @
15ddd843
...
@@ -16,6 +16,7 @@ from sglang.test.test_utils import (
...
@@ -16,6 +16,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -50,7 +51,7 @@ def setup_class(cls, backend: str):
...
@@ -50,7 +51,7 @@ def setup_class(cls, backend: str):
)
)
class
TestJSONConstrainedOutlinesBackend
(
unittest
.
TestCase
):
class
TestJSONConstrainedOutlinesBackend
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
setup_class
(
cls
,
backend
=
"outlines"
)
setup_class
(
cls
,
backend
=
"outlines"
)
...
...
test/srt/test_large_max_new_tokens.py
View file @
15ddd843
...
@@ -17,11 +17,12 @@ from sglang.test.test_utils import (
...
@@ -17,11 +17,12 @@ from sglang.test.test_utils import (
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
STDERR_FILENAME
,
STDERR_FILENAME
,
STDOUT_FILENAME
,
STDOUT_FILENAME
,
CustomTestCase
,
popen_launch_server
,
popen_launch_server
,
)
)
class
TestLargeMaxNewTokens
(
unittest
.
TestCase
):
class
TestLargeMaxNewTokens
(
Custom
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment