"src/vscode:/vscode.git/clone" did not exist on "4f00d5ac6fa408a9ca73141db5e8d0cbb1881d92"
Unverified Commit 3ae33fcd authored by Xiaoyu Zhang's avatar Xiaoyu Zhang Committed by GitHub
Browse files

Fix hopper launch gpt-oss model illegal memory (#8908)

parent 500b15c9
...@@ -16,6 +16,7 @@ from sglang.srt.layers.quantization.base_config import ( ...@@ -16,6 +16,7 @@ from sglang.srt.layers.quantization.base_config import (
QuantizeMethodBase, QuantizeMethodBase,
) )
from sglang.srt.layers.quantization.utils import is_layer_skipped from sglang.srt.layers.quantization.utils import is_layer_skipped
from sglang.srt.layers.utils import is_sm100_supported
from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.utils import ( from sglang.srt.utils import (
direct_register_custom_op, direct_register_custom_op,
...@@ -28,6 +29,7 @@ from sglang.srt.utils import ( ...@@ -28,6 +29,7 @@ from sglang.srt.utils import (
set_weight_attrs, set_weight_attrs,
) )
_is_sm100_supported = is_cuda() and is_sm100_supported()
has_triton_kernels = importlib.util.find_spec("triton_kernels") is not None has_triton_kernels = importlib.util.find_spec("triton_kernels") is not None
...@@ -244,13 +246,17 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): ...@@ -244,13 +246,17 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
# pad the intermediate size to be a multiple of 2 * mxfp4_block # pad the intermediate size to be a multiple of 2 * mxfp4_block
# for to hold non-uniform sharded tensor as well as swizzling # for to hold non-uniform sharded tensor as well as swizzling
intermediate_size_per_partition_after_pad = intermediate_size
if _is_sm100_supported:
if self.use_flashinfer: if self.use_flashinfer:
intermediate_size_per_partition_after_pad = round_up(intermediate_size, 256) intermediate_size_per_partition_after_pad = round_up(
intermediate_size, 256
)
hidden_size = round_up(hidden_size, 256) hidden_size = round_up(hidden_size, 256)
elif is_hip():
intermediate_size_per_partition_after_pad = round_up(intermediate_size, 128)
else: else:
intermediate_size_per_partition_after_pad = round_up(intermediate_size, 64) intermediate_size_per_partition_after_pad = round_up(
intermediate_size, 64
)
self.intermediate_size = intermediate_size_per_partition_after_pad self.intermediate_size = intermediate_size_per_partition_after_pad
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment