Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
e39628fd
Unverified
Commit
e39628fd
authored
Oct 29, 2025
by
Minglei Zhu
Committed by
GitHub
Oct 29, 2025
Browse files
[2/2] Deepseek deterministic: support deepseek v3 deterministic inference on 8 x H200 (#12095)
parent
bacb3825
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
0 deletions
+19
-0
python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py
...rt/layers/moe/fused_moe_triton/fused_moe_triton_config.py
+14
-0
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+3
-0
test/srt/test_fused_moe.py
test/srt/test_fused_moe.py
+2
-0
No files found.
python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py
View file @
e39628fd
...
@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Optional, Tuple
...
@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Optional, Tuple
import
torch
import
torch
import
triton
import
triton
from
sglang.srt.server_args
import
get_global_server_args
from
sglang.srt.utils
import
get_device_name
,
is_hip
from
sglang.srt.utils
import
get_device_name
,
is_hip
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -51,6 +52,11 @@ def get_moe_configs(
...
@@ -51,6 +52,11 @@ def get_moe_configs(
kernel on a given batch size bs, the closest batch size in the grid should
kernel on a given batch size bs, the closest batch size in the grid should
be picked and the associated configuration chosen to invoke the kernel.
be picked and the associated configuration chosen to invoke the kernel.
"""
"""
if
get_global_server_args
().
enable_deterministic_inference
:
logger
.
warning
(
"Deterministic inference is enabled, using default MoE kernel config."
)
return
None
# Supported Triton versions, should be sorted from the newest to the oldest
# Supported Triton versions, should be sorted from the newest to the oldest
supported_triton_versions
=
[
"3.4.0"
,
"3.3.1"
,
"3.2.0"
,
"3.1.0"
]
supported_triton_versions
=
[
"3.4.0"
,
"3.3.1"
,
"3.2.0"
,
"3.1.0"
]
...
@@ -130,6 +136,14 @@ def get_default_config(
...
@@ -130,6 +136,14 @@ def get_default_config(
is_marlin
:
bool
,
is_marlin
:
bool
,
block_shape
:
Optional
[
List
[
int
]]
=
None
,
block_shape
:
Optional
[
List
[
int
]]
=
None
,
)
->
Dict
[
str
,
int
]:
)
->
Dict
[
str
,
int
]:
if
get_global_server_args
().
enable_deterministic_inference
:
config
=
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
32
,
"GROUP_SIZE_M"
:
8
,
}
return
config
if
dtype
==
"fp8_w8a8"
:
if
dtype
==
"fp8_w8a8"
:
if
block_shape
is
None
:
if
block_shape
is
None
:
config
=
{
config
=
{
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
e39628fd
...
@@ -515,6 +515,9 @@ class MoEGate(nn.Module):
...
@@ -515,6 +515,9 @@ class MoEGate(nn.Module):
True
,
# is_vnni
True
,
# is_vnni
)
)
if
get_global_server_args
().
enable_deterministic_inference
:
return
F
.
linear
(
hidden_states
,
self
.
weight
,
None
)
# NOTE: For some unknown reason, router_gemm seems degrade accept length.
# NOTE: For some unknown reason, router_gemm seems degrade accept length.
if
(
if
(
_is_cuda
_is_cuda
...
...
test/srt/test_fused_moe.py
View file @
e39628fd
...
@@ -193,6 +193,8 @@ class TestFusedMOE(CustomTestCase):
...
@@ -193,6 +193,8 @@ class TestFusedMOE(CustomTestCase):
dtypes
=
[
torch
.
float16
,
torch
.
bfloat16
]
dtypes
=
[
torch
.
float16
,
torch
.
bfloat16
]
fp8_modes
=
[
False
,
True
]
fp8_modes
=
[
False
,
True
]
set_global_server_args_for_scheduler
(
ServerArgs
(
model_path
=
"dummy"
))
# Calculate total number of tests
# Calculate total number of tests
total_tests
=
(
total_tests
=
(
len
(
m_values
)
len
(
m_values
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment