Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
9adf178c
Unverified
Commit
9adf178c
authored
Mar 30, 2025
by
Lianmin Zheng
Committed by
GitHub
Mar 30, 2025
Browse files
Fix 2-gpu CI test and suppress some warnings (#4930)
parent
f842853a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
21 additions
and
19 deletions
+21
-19
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+5
-3
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+4
-4
test/srt/run_suite.py
test/srt/run_suite.py
+11
-11
test/srt/test_eagle_infer.py
test/srt/test_eagle_infer.py
+1
-1
No files found.
python/sglang/srt/models/deepseek_v2.py
View file @
9adf178c
...
...
@@ -30,9 +30,6 @@ from sglang.srt.distributed import (
tensor_model_parallel_all_reduce
,
)
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.attention.triton_ops.rocm_mla_decode_rope
import
(
decode_attention_fwd_grouped_rope
,
)
from
sglang.srt.layers.dp_attention
import
(
dp_gather_partial
,
dp_scatter
,
...
...
@@ -83,6 +80,11 @@ if _is_cuda:
else
:
from
vllm
import
_custom_ops
as
ops
if
_is_hip
:
from
sglang.srt.layers.attention.triton_ops.rocm_mla_decode_rope
import
(
decode_attention_fwd_grouped_rope
,
)
expert_distribution_recorder
=
ExpertDistributionRecorder
()
...
...
python/sglang/srt/utils.py
View file @
9adf178c
...
...
@@ -564,6 +564,10 @@ def load_image(image_file: Union[str, bytes]) -> tuple[Image, tuple[int, int]]:
def
suppress_other_loggers
():
warnings
.
filterwarnings
(
"ignore"
,
category
=
UserWarning
,
message
=
"The given NumPy array is not writable"
)
try
:
from
vllm.logger
import
logger
as
vllm_default_logger
except
ImportError
:
...
...
@@ -578,10 +582,6 @@ def suppress_other_loggers():
)
logging
.
getLogger
(
"vllm.config"
).
setLevel
(
logging
.
ERROR
)
warnings
.
filterwarnings
(
"ignore"
,
category
=
UserWarning
,
message
=
"The given NumPy array is not writable"
)
def
assert_pkg_version
(
pkg
:
str
,
min_version
:
str
,
message
:
str
):
try
:
...
...
test/srt/run_suite.py
View file @
9adf178c
...
...
@@ -15,7 +15,7 @@ suites = {
"per-commit"
:
[
TestFile
(
"models/lora/test_lora.py"
,
76
),
TestFile
(
"models/lora/test_lora_backend.py"
,
420
),
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
144
),
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/test_embedding_models.py"
,
35
),
TestFile
(
"models/test_generation_models.py"
,
103
),
TestFile
(
"models/test_grok_models.py"
,
60
),
...
...
@@ -28,15 +28,15 @@ suites = {
TestFile
(
"test_chunked_prefill.py"
,
336
),
TestFile
(
"test_eagle_infer.py"
,
500
),
TestFile
(
"test_ebnf_constrained.py"
),
TestFile
(
"test_fp8_kernel.py"
,
2
),
TestFile
(
"test_fp8_kernel.py"
,
8
),
TestFile
(
"test_embedding_openai_server.py"
,
36
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_int8_kernel.py"
,
1
),
TestFile
(
"test_int8_kernel.py"
,
8
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_json_constrained.py"
,
98
),
TestFile
(
"test_large_max_new_tokens.py"
,
41
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_mla.py"
,
9
2
),
TestFile
(
"test_mla.py"
,
16
2
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
522
),
TestFile
(
"test_mla_flashinfer.py"
,
395
),
...
...
@@ -68,23 +68,23 @@ suites = {
TestFile
(
"test_vertex_endpoint.py"
,
31
),
TestFile
(
"test_vision_chunked_prefill.py"
,
223
),
TestFile
(
"test_vlm_accuracy.py"
,
60
),
TestFile
(
"test_vision_openai_server.py"
,
344
),
TestFile
(
"test_fim_completion.py"
,
12
0
),
TestFile
(
"test_vision_openai_server.py"
,
537
),
TestFile
(
"test_fim_completion.py"
,
4
0
),
TestFile
(
"test_w8a8_quantization.py"
,
46
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
172
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_hicache.py"
,
60
),
TestFile
(
"test_hicache_mla.py"
,
90
),
],
"per-commit-2-gpu"
:
[
TestFile
(
"models/lora/test_lora_tp.py"
,
300
),
TestFile
(
"test_data_parallelism.py"
,
90
),
TestFile
(
"test_dp_attention.py"
,
90
),
TestFile
(
"test_mla_tp.py"
,
420
),
TestFile
(
"test_moe_ep.py"
,
220
),
TestFile
(
"test_patch_torch.py"
,
30
),
TestFile
(
"test_update_weights_from_distributed.py"
,
100
),
TestFile
(
"test_verl_engine.py"
,
100
),
TestFile
(
"test_patch_torch.py"
,
30
),
TestFile
(
"test_moe_ep.py"
,
220
),
TestFile
(
"test_mla_tp.py"
,
420
),
TestFile
(
"test_lora_tp.py"
,
300
),
],
"nightly"
:
[
TestFile
(
"test_nightly_gsm8k_eval.py"
),
...
...
test/srt/test_eagle_infer.py
View file @
9adf178c
...
...
@@ -567,7 +567,7 @@ class TestEAGLEServerPageSize(TestEAGLEServer):
"--max-running-requests"
,
8
,
"--page-size"
,
4
,
8
,
],
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment