Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
3ae33fcd
"src/vscode:/vscode.git/clone" did not exist on "4f00d5ac6fa408a9ca73141db5e8d0cbb1881d92"
Unverified
Commit
3ae33fcd
authored
Aug 08, 2025
by
Xiaoyu Zhang
Committed by
GitHub
Aug 07, 2025
Browse files
Fix hopper launch gpt-oss model illegal memory (#8908)
parent
500b15c9
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
7 deletions
+13
-7
python/sglang/srt/layers/quantization/mxfp4.py
python/sglang/srt/layers/quantization/mxfp4.py
+13
-7
No files found.
python/sglang/srt/layers/quantization/mxfp4.py
View file @
3ae33fcd
...
@@ -16,6 +16,7 @@ from sglang.srt.layers.quantization.base_config import (
...
@@ -16,6 +16,7 @@ from sglang.srt.layers.quantization.base_config import (
QuantizeMethodBase
,
QuantizeMethodBase
,
)
)
from
sglang.srt.layers.quantization.utils
import
is_layer_skipped
from
sglang.srt.layers.quantization.utils
import
is_layer_skipped
from
sglang.srt.layers.utils
import
is_sm100_supported
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
direct_register_custom_op
,
direct_register_custom_op
,
...
@@ -28,6 +29,7 @@ from sglang.srt.utils import (
...
@@ -28,6 +29,7 @@ from sglang.srt.utils import (
set_weight_attrs
,
set_weight_attrs
,
)
)
_is_sm100_supported
=
is_cuda
()
and
is_sm100_supported
()
has_triton_kernels
=
importlib
.
util
.
find_spec
(
"triton_kernels"
)
is
not
None
has_triton_kernels
=
importlib
.
util
.
find_spec
(
"triton_kernels"
)
is
not
None
...
@@ -244,13 +246,17 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
...
@@ -244,13 +246,17 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
# pad the intermediate size to be a multiple of 2 * mxfp4_block
# pad the intermediate size to be a multiple of 2 * mxfp4_block
# for to hold non-uniform sharded tensor as well as swizzling
# for to hold non-uniform sharded tensor as well as swizzling
intermediate_size_per_partition_after_pad
=
intermediate_size
if
_is_sm100_supported
:
if
self
.
use_flashinfer
:
if
self
.
use_flashinfer
:
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size
,
256
)
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size
,
256
)
hidden_size
=
round_up
(
hidden_size
,
256
)
hidden_size
=
round_up
(
hidden_size
,
256
)
elif
is_hip
():
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size
,
128
)
else
:
else
:
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size
,
64
)
intermediate_size_per_partition_after_pad
=
round_up
(
intermediate_size
,
64
)
self
.
intermediate_size
=
intermediate_size_per_partition_after_pad
self
.
intermediate_size
=
intermediate_size_per_partition_after_pad
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment