Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7ed8ba05
Unverified
Commit
7ed8ba05
authored
Oct 29, 2025
by
b8zhong
Committed by
GitHub
Oct 29, 2025
Browse files
[CI] Add Llama 3.1 8B FP4 to B200 CI (#12182)
parent
df08f346
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
60 additions
and
1 deletion
+60
-1
test/srt/run_suite.py
test/srt/run_suite.py
+2
-1
test/srt/test_llama31_fp4.py
test/srt/test_llama31_fp4.py
+58
-0
No files found.
test/srt/run_suite.py
View file @
7ed8ba05
...
@@ -180,9 +180,10 @@ suites = {
...
@@ -180,9 +180,10 @@ suites = {
TestFile
(
"test_disaggregation_pp.py"
,
140
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
],
],
"per-commit-4-gpu-b200"
:
[
"per-commit-4-gpu-b200"
:
[
TestFile
(
"test_deepseek_v3_fp4_4gpu.py"
,
3600
),
TestFile
(
"test_flash_attention_4.py"
,
300
),
TestFile
(
"test_flash_attention_4.py"
,
300
),
TestFile
(
"test_gpt_oss_4gpu.py"
,
600
),
TestFile
(
"test_gpt_oss_4gpu.py"
,
600
),
TestFile
(
"test_
deepseek_v3_fp4_4gpu
.py"
,
3
6
00
),
TestFile
(
"test_
llama31_fp4
.py"
,
300
),
],
],
"per-commit-4-gpu-deepep"
:
[
"per-commit-4-gpu-deepep"
:
[
TestFile
(
"ep/test_deepep_small.py"
,
531
),
TestFile
(
"ep/test_deepep_small.py"
,
531
),
...
...
test/srt/test_llama31_fp4.py
0 → 100644
View file @
7ed8ba05
import
unittest
from
types
import
SimpleNamespace
from
urllib.parse
import
urlparse
from
sglang.srt.utils
import
get_device_sm
,
kill_process_tree
from
sglang.test.few_shot_gsm8k
import
run_eval
as
run_eval_few_shot_gsm8k
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
MODEL_PATH
=
"nvidia/Llama-3.1-8B-Instruct-FP4"
@
unittest
.
skipIf
(
get_device_sm
()
<
100
,
"Test requires CUDA SM 100 or higher"
)
class
TestLlama31FP4B200
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
MODEL_PATH
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.8"
,
"--quantization"
,
"modelopt_fp4"
,
]
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
)
@
classmethod
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
def
test_gsm8k
(
self
):
parsed_url
=
urlparse
(
self
.
base_url
)
args
=
SimpleNamespace
(
num_shots
=
4
,
data_path
=
None
,
num_questions
=
100
,
max_new_tokens
=
512
,
parallel
=
128
,
host
=
f
"
{
parsed_url
.
scheme
}
://
{
parsed_url
.
hostname
}
"
,
port
=
parsed_url
.
port
,
)
metrics
=
run_eval_few_shot_gsm8k
(
args
)
print
(
metrics
)
self
.
assertGreater
(
metrics
[
"accuracy"
],
0.61
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment