Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
3ae33fcd
Unverified
Commit
3ae33fcd
authored
Aug 08, 2025
by
Xiaoyu Zhang
Committed by
GitHub
Aug 07, 2025
Browse files
Fix hopper launch gpt-oss model illegal memory (#8908)
parent
500b15c9
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
7 deletions
+13
-7
python/sglang/srt/layers/quantization/mxfp4.py
python/sglang/srt/layers/quantization/mxfp4.py
+13
-7
No files found.
python/sglang/srt/layers/quantization/mxfp4.py
View file @
3ae33fcd
...
@@ -16,6 +16,7 @@ from sglang.srt.layers.quantization.base_config import (
...
@@ -16,6 +16,7 @@ from sglang.srt.layers.quantization.base_config import (
QuantizeMethodBase
,
QuantizeMethodBase
,
)
)
from
sglang.srt.layers.quantization.utils
import
is_layer_skipped
from
sglang.srt.layers.quantization.utils
import
is_layer_skipped
from
sglang.srt.layers.utils
import
is_sm100_supported
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
direct_register_custom_op
,
direct_register_custom_op
,
...
@@ -28,6 +29,7 @@ from sglang.srt.utils import (
...
@@ -28,6 +29,7 @@ from sglang.srt.utils import (
set_weight_attrs
,
set_weight_attrs
,
)
)
_is_sm100_supported
=
is_cuda
()
and
is_sm100_supported
()
has_triton_kernels
=
importlib
.
util
.
find_spec
(
"triton_kernels"
)
is
not
None
has_triton_kernels
=
importlib
.
util
.
find_spec
(
"triton_kernels"
)
is
not
None
...
@@ -244,13 +246,17 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
...
@@ -244,13 +246,17 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
# pad the intermediate size to be a multiple of 2 * mxfp4_block
# pad the intermediate size to be a multiple of 2 * mxfp4_block
# for to hold non-uniform sharded tensor as well as swizzling
# for to hold non-uniform sharded tensor as well as swizzling
if
self
.
use_flashinfer
:
intermediate_size_per_partition_after_pad
=
intermediate_size
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size
,
256
)
if
_is_sm100_supported
:
hidden_size
=
round_up
(
hidden_size
,
256
)
if
self
.
use_flashinfer
:
elif
is_hip
():
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size
,
128
)
intermediate_size
,
256
else
:
)
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size
,
64
)
hidden_size
=
round_up
(
hidden_size
,
256
)
else
:
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size
,
64
)
self
.
intermediate_size
=
intermediate_size_per_partition_after_pad
self
.
intermediate_size
=
intermediate_size_per_partition_after_pad
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment